.text


.globl  aesni_cbc_sha256_enc
.type   aesni_cbc_sha256_enc,@function
.align  16
aesni_cbc_sha256_enc:
.cfi_startproc
        leaq    OPENSSL_ia32cap_P(%rip),%r11
        movl    $1,%eax
        cmpq    $0,%rdi
        je      .Lprobe
        movl    0(%r11),%eax
        movq    4(%r11),%r10
        btq     $61,%r10
        jc      aesni_cbc_sha256_enc_shaext
        movq    %r10,%r11
        shrq    $32,%r11

        testl   $2048,%r10d
        jnz     aesni_cbc_sha256_enc_xop
        andl    $296,%r11d
        cmpl    $296,%r11d
        je      aesni_cbc_sha256_enc_avx2
        andl    $268435456,%r10d
        jnz     aesni_cbc_sha256_enc_avx
        ud2
        xorl    %eax,%eax
        cmpq    $0,%rdi
        je      .Lprobe
        ud2
.Lprobe:
        .byte   0xf3,0xc3
.cfi_endproc
.size   aesni_cbc_sha256_enc,.-aesni_cbc_sha256_enc

.align  64
.type   K256,@object
K256:
.long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
.long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
.long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
.long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
.long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
.long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
.long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
.long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
.long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
.long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
.long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
.long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
.long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
.long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
.long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
.long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
.long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
.long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
.long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
.long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
.long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
.long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
.long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
.long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
.long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2

.long   0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
.long   0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
.long   0,0,0,0,   0,0,0,0,   -1,-1,-1,-1
.long   0,0,0,0,   0,0,0,0
.byte   65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align  64
.type   aesni_cbc_sha256_enc_xop,@function
.align  64
aesni_cbc_sha256_enc_xop:
.cfi_startproc
.Lxop_shortcut:
        movq    8(%rsp),%r10
        movq    %rsp,%rax
.cfi_def_cfa_register   %rax
        pushq   %rbx
.cfi_offset     %rbx,-16
        pushq   %rbp
.cfi_offset     %rbp,-24
        pushq   %r12
.cfi_offset     %r12,-32
        pushq   %r13
.cfi_offset     %r13,-40
        pushq   %r14
.cfi_offset     %r14,-48
        pushq   %r15
.cfi_offset     %r15,-56
        subq    $128,%rsp
        andq    $-64,%rsp

        shlq    $6,%rdx
        subq    %rdi,%rsi
        subq    %rdi,%r10
        addq    %rdi,%rdx


        movq    %rsi,64+8(%rsp)
        movq    %rdx,64+16(%rsp)

        movq    %r8,64+32(%rsp)
        movq    %r9,64+40(%rsp)
        movq    %r10,64+48(%rsp)
        movq    %rax,120(%rsp)
.cfi_escape     0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_xop:
        vzeroall

        movq    %rdi,%r12
        leaq    128(%rcx),%rdi
        leaq    K256+544(%rip),%r13
        movl    240-128(%rdi),%r14d
        movq    %r9,%r15
        movq    %r10,%rsi
        vmovdqu (%r8),%xmm8
        subq    $9,%r14

        movl    0(%r15),%eax
        movl    4(%r15),%ebx
        movl    8(%r15),%ecx
        movl    12(%r15),%edx
        movl    16(%r15),%r8d
        movl    20(%r15),%r9d
        movl    24(%r15),%r10d
        movl    28(%r15),%r11d

        vmovdqa 0(%r13,%r14,8),%xmm14
        vmovdqa 16(%r13,%r14,8),%xmm13
        vmovdqa 32(%r13,%r14,8),%xmm12
        vmovdqu 0-128(%rdi),%xmm10
        jmp     .Lloop_xop
.align  16
.Lloop_xop:
        vmovdqa K256+512(%rip),%xmm7
        vmovdqu 0(%rsi,%r12,1),%xmm0
        vmovdqu 16(%rsi,%r12,1),%xmm1
        vmovdqu 32(%rsi,%r12,1),%xmm2
        vmovdqu 48(%rsi,%r12,1),%xmm3
        vpshufb %xmm7,%xmm0,%xmm0
        leaq    K256(%rip),%rbp
        vpshufb %xmm7,%xmm1,%xmm1
        vpshufb %xmm7,%xmm2,%xmm2
        vpaddd  0(%rbp),%xmm0,%xmm4
        vpshufb %xmm7,%xmm3,%xmm3
        vpaddd  32(%rbp),%xmm1,%xmm5
        vpaddd  64(%rbp),%xmm2,%xmm6
        vpaddd  96(%rbp),%xmm3,%xmm7
        vmovdqa %xmm4,0(%rsp)
        movl    %eax,%r14d
        vmovdqa %xmm5,16(%rsp)
        movl    %ebx,%esi
        vmovdqa %xmm6,32(%rsp)
        xorl    %ecx,%esi
        vmovdqa %xmm7,48(%rsp)
        movl    %r8d,%r13d
        jmp     .Lxop_00_47

.align  16
.Lxop_00_47:
        subq    $-32*4,%rbp
        vmovdqu (%r12),%xmm9
        movq    %r12,64+0(%rsp)
        vpalignr        $4,%xmm0,%xmm1,%xmm4
        rorl    $14,%r13d
        movl    %r14d,%eax
        vpalignr        $4,%xmm2,%xmm3,%xmm7
        movl    %r9d,%r12d
        xorl    %r8d,%r13d
.byte   143,232,120,194,236,14
        rorl    $9,%r14d
        xorl    %r10d,%r12d
        vpsrld  $3,%xmm4,%xmm4
        rorl    $5,%r13d
        xorl    %eax,%r14d
        vpaddd  %xmm7,%xmm0,%xmm0
        andl    %r8d,%r12d
        vpxor   %xmm10,%xmm9,%xmm9
        vmovdqu 16-128(%rdi),%xmm10
        xorl    %r8d,%r13d
        addl    0(%rsp),%r11d
        movl    %eax,%r15d
.byte   143,232,120,194,245,11
        rorl    $11,%r14d
        xorl    %r10d,%r12d
        vpxor   %xmm5,%xmm4,%xmm4
        xorl    %ebx,%r15d
        rorl    $6,%r13d
        addl    %r12d,%r11d
        andl    %r15d,%esi
.byte   143,232,120,194,251,13
        xorl    %eax,%r14d
        addl    %r13d,%r11d
        vpxor   %xmm6,%xmm4,%xmm4
        xorl    %ebx,%esi
        addl    %r11d,%edx
        vpsrld  $10,%xmm3,%xmm6
        rorl    $2,%r14d
        addl    %esi,%r11d
        vpaddd  %xmm4,%xmm0,%xmm0
        movl    %edx,%r13d
        addl    %r11d,%r14d
.byte   143,232,120,194,239,2
        rorl    $14,%r13d
        movl    %r14d,%r11d
        vpxor   %xmm6,%xmm7,%xmm7
        movl    %r8d,%r12d
        xorl    %edx,%r13d
        rorl    $9,%r14d
        xorl    %r9d,%r12d
        vpxor   %xmm5,%xmm7,%xmm7
        rorl    $5,%r13d
        xorl    %r11d,%r14d
        andl    %edx,%r12d
        vpxor   %xmm8,%xmm9,%xmm9
        xorl    %edx,%r13d
        vpsrldq $8,%xmm7,%xmm7
        addl    4(%rsp),%r10d
        movl    %r11d,%esi
        rorl    $11,%r14d
        xorl    %r9d,%r12d
        vpaddd  %xmm7,%xmm0,%xmm0
        xorl    %eax,%esi
        rorl    $6,%r13d
        addl    %r12d,%r10d
        andl    %esi,%r15d
.byte   143,232,120,194,248,13
        xorl    %r11d,%r14d
        addl    %r13d,%r10d
        vpsrld  $10,%xmm0,%xmm6
        xorl    %eax,%r15d
        addl    %r10d,%ecx
.byte   143,232,120,194,239,2
        rorl    $2,%r14d
        addl    %r15d,%r10d
        vpxor   %xmm6,%xmm7,%xmm7
        movl    %ecx,%r13d
        addl    %r10d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r10d
        vpxor   %xmm5,%xmm7,%xmm7
        movl    %edx,%r12d
        xorl    %ecx,%r13d
        rorl    $9,%r14d
        xorl    %r8d,%r12d
        vpslldq $8,%xmm7,%xmm7
        rorl    $5,%r13d
        xorl    %r10d,%r14d
        andl    %ecx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 32-128(%rdi),%xmm10
        xorl    %ecx,%r13d
        vpaddd  %xmm7,%xmm0,%xmm0
        addl    8(%rsp),%r9d
        movl    %r10d,%r15d
        rorl    $11,%r14d
        xorl    %r8d,%r12d
        vpaddd  0(%rbp),%xmm0,%xmm6
        xorl    %r11d,%r15d
        rorl    $6,%r13d
        addl    %r12d,%r9d
        andl    %r15d,%esi
        xorl    %r10d,%r14d
        addl    %r13d,%r9d
        xorl    %r11d,%esi
        addl    %r9d,%ebx
        rorl    $2,%r14d
        addl    %esi,%r9d
        movl    %ebx,%r13d
        addl    %r9d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r9d
        movl    %ecx,%r12d
        xorl    %ebx,%r13d
        rorl    $9,%r14d
        xorl    %edx,%r12d
        rorl    $5,%r13d
        xorl    %r9d,%r14d
        andl    %ebx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 48-128(%rdi),%xmm10
        xorl    %ebx,%r13d
        addl    12(%rsp),%r8d
        movl    %r9d,%esi
        rorl    $11,%r14d
        xorl    %edx,%r12d
        xorl    %r10d,%esi
        rorl    $6,%r13d
        addl    %r12d,%r8d
        andl    %esi,%r15d
        xorl    %r9d,%r14d
        addl    %r13d,%r8d
        xorl    %r10d,%r15d
        addl    %r8d,%eax
        rorl    $2,%r14d
        addl    %r15d,%r8d
        movl    %eax,%r13d
        addl    %r8d,%r14d
        vmovdqa %xmm6,0(%rsp)
        vpalignr        $4,%xmm1,%xmm2,%xmm4
        rorl    $14,%r13d
        movl    %r14d,%r8d
        vpalignr        $4,%xmm3,%xmm0,%xmm7
        movl    %ebx,%r12d
        xorl    %eax,%r13d
.byte   143,232,120,194,236,14
        rorl    $9,%r14d
        xorl    %ecx,%r12d
        vpsrld  $3,%xmm4,%xmm4
        rorl    $5,%r13d
        xorl    %r8d,%r14d
        vpaddd  %xmm7,%xmm1,%xmm1
        andl    %eax,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 64-128(%rdi),%xmm10
        xorl    %eax,%r13d
        addl    16(%rsp),%edx
        movl    %r8d,%r15d
.byte   143,232,120,194,245,11
        rorl    $11,%r14d
        xorl    %ecx,%r12d
        vpxor   %xmm5,%xmm4,%xmm4
        xorl    %r9d,%r15d
        rorl    $6,%r13d
        addl    %r12d,%edx
        andl    %r15d,%esi
.byte   143,232,120,194,248,13
        xorl    %r8d,%r14d
        addl    %r13d,%edx
        vpxor   %xmm6,%xmm4,%xmm4
        xorl    %r9d,%esi
        addl    %edx,%r11d
        vpsrld  $10,%xmm0,%xmm6
        rorl    $2,%r14d
        addl    %esi,%edx
        vpaddd  %xmm4,%xmm1,%xmm1
        movl    %r11d,%r13d
        addl    %edx,%r14d
.byte   143,232,120,194,239,2
        rorl    $14,%r13d
        movl    %r14d,%edx
        vpxor   %xmm6,%xmm7,%xmm7
        movl    %eax,%r12d
        xorl    %r11d,%r13d
        rorl    $9,%r14d
        xorl    %ebx,%r12d
        vpxor   %xmm5,%xmm7,%xmm7
        rorl    $5,%r13d
        xorl    %edx,%r14d
        andl    %r11d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 80-128(%rdi),%xmm10
        xorl    %r11d,%r13d
        vpsrldq $8,%xmm7,%xmm7
        addl    20(%rsp),%ecx
        movl    %edx,%esi
        rorl    $11,%r14d
        xorl    %ebx,%r12d
        vpaddd  %xmm7,%xmm1,%xmm1
        xorl    %r8d,%esi
        rorl    $6,%r13d
        addl    %r12d,%ecx
        andl    %esi,%r15d
.byte   143,232,120,194,249,13
        xorl    %edx,%r14d
        addl    %r13d,%ecx
        vpsrld  $10,%xmm1,%xmm6
        xorl    %r8d,%r15d
        addl    %ecx,%r10d
.byte   143,232,120,194,239,2
        rorl    $2,%r14d
        addl    %r15d,%ecx
        vpxor   %xmm6,%xmm7,%xmm7
        movl    %r10d,%r13d
        addl    %ecx,%r14d
        rorl    $14,%r13d
        movl    %r14d,%ecx
        vpxor   %xmm5,%xmm7,%xmm7
        movl    %r11d,%r12d
        xorl    %r10d,%r13d
        rorl    $9,%r14d
        xorl    %eax,%r12d
        vpslldq $8,%xmm7,%xmm7
        rorl    $5,%r13d
        xorl    %ecx,%r14d
        andl    %r10d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 96-128(%rdi),%xmm10
        xorl    %r10d,%r13d
        vpaddd  %xmm7,%xmm1,%xmm1
        addl    24(%rsp),%ebx
        movl    %ecx,%r15d
        rorl    $11,%r14d
        xorl    %eax,%r12d
        vpaddd  32(%rbp),%xmm1,%xmm6
        xorl    %edx,%r15d
        rorl    $6,%r13d
        addl    %r12d,%ebx
        andl    %r15d,%esi
        xorl    %ecx,%r14d
        addl    %r13d,%ebx
        xorl    %edx,%esi
        addl    %ebx,%r9d
        rorl    $2,%r14d
        addl    %esi,%ebx
        movl    %r9d,%r13d
        addl    %ebx,%r14d
        rorl    $14,%r13d
        movl    %r14d,%ebx
        movl    %r10d,%r12d
        xorl    %r9d,%r13d
        rorl    $9,%r14d
        xorl    %r11d,%r12d
        rorl    $5,%r13d
        xorl    %ebx,%r14d
        andl    %r9d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 112-128(%rdi),%xmm10
        xorl    %r9d,%r13d
        addl    28(%rsp),%eax
        movl    %ebx,%esi
        rorl    $11,%r14d
        xorl    %r11d,%r12d
        xorl    %ecx,%esi
        rorl    $6,%r13d
        addl    %r12d,%eax
        andl    %esi,%r15d
        xorl    %ebx,%r14d
        addl    %r13d,%eax
        xorl    %ecx,%r15d
        addl    %eax,%r8d
        rorl    $2,%r14d
        addl    %r15d,%eax
        movl    %r8d,%r13d
        addl    %eax,%r14d
        vmovdqa %xmm6,16(%rsp)
        vpalignr        $4,%xmm2,%xmm3,%xmm4
        rorl    $14,%r13d
        movl    %r14d,%eax
        vpalignr        $4,%xmm0,%xmm1,%xmm7
        movl    %r9d,%r12d
        xorl    %r8d,%r13d
.byte   143,232,120,194,236,14
        rorl    $9,%r14d
        xorl    %r10d,%r12d
        vpsrld  $3,%xmm4,%xmm4
        rorl    $5,%r13d
        xorl    %eax,%r14d
        vpaddd  %xmm7,%xmm2,%xmm2
        andl    %r8d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 128-128(%rdi),%xmm10
        xorl    %r8d,%r13d
        addl    32(%rsp),%r11d
        movl    %eax,%r15d
.byte   143,232,120,194,245,11
        rorl    $11,%r14d
        xorl    %r10d,%r12d
        vpxor   %xmm5,%xmm4,%xmm4
        xorl    %ebx,%r15d
        rorl    $6,%r13d
        addl    %r12d,%r11d
        andl    %r15d,%esi
.byte   143,232,120,194,249,13
        xorl    %eax,%r14d
        addl    %r13d,%r11d
        vpxor   %xmm6,%xmm4,%xmm4
        xorl    %ebx,%esi
        addl    %r11d,%edx
        vpsrld  $10,%xmm1,%xmm6
        rorl    $2,%r14d
        addl    %esi,%r11d
        vpaddd  %xmm4,%xmm2,%xmm2
        movl    %edx,%r13d
        addl    %r11d,%r14d
.byte   143,232,120,194,239,2
        rorl    $14,%r13d
        movl    %r14d,%r11d
        vpxor   %xmm6,%xmm7,%xmm7
        movl    %r8d,%r12d
        xorl    %edx,%r13d
        rorl    $9,%r14d
        xorl    %r9d,%r12d
        vpxor   %xmm5,%xmm7,%xmm7
        rorl    $5,%r13d
        xorl    %r11d,%r14d
        andl    %edx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 144-128(%rdi),%xmm10
        xorl    %edx,%r13d
        vpsrldq $8,%xmm7,%xmm7
        addl    36(%rsp),%r10d
        movl    %r11d,%esi
        rorl    $11,%r14d
        xorl    %r9d,%r12d
        vpaddd  %xmm7,%xmm2,%xmm2
        xorl    %eax,%esi
        rorl    $6,%r13d
        addl    %r12d,%r10d
        andl    %esi,%r15d
.byte   143,232,120,194,250,13
        xorl    %r11d,%r14d
        addl    %r13d,%r10d
        vpsrld  $10,%xmm2,%xmm6
        xorl    %eax,%r15d
        addl    %r10d,%ecx
.byte   143,232,120,194,239,2
        rorl    $2,%r14d
        addl    %r15d,%r10d
        vpxor   %xmm6,%xmm7,%xmm7
        movl    %ecx,%r13d
        addl    %r10d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r10d
        vpxor   %xmm5,%xmm7,%xmm7
        movl    %edx,%r12d
        xorl    %ecx,%r13d
        rorl    $9,%r14d
        xorl    %r8d,%r12d
        vpslldq $8,%xmm7,%xmm7
        rorl    $5,%r13d
        xorl    %r10d,%r14d
        andl    %ecx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 160-128(%rdi),%xmm10
        xorl    %ecx,%r13d
        vpaddd  %xmm7,%xmm2,%xmm2
        addl    40(%rsp),%r9d
        movl    %r10d,%r15d
        rorl    $11,%r14d
        xorl    %r8d,%r12d
        vpaddd  64(%rbp),%xmm2,%xmm6
        xorl    %r11d,%r15d
        rorl    $6,%r13d
        addl    %r12d,%r9d
        andl    %r15d,%esi
        xorl    %r10d,%r14d
        addl    %r13d,%r9d
        xorl    %r11d,%esi
        addl    %r9d,%ebx
        rorl    $2,%r14d
        addl    %esi,%r9d
        movl    %ebx,%r13d
        addl    %r9d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r9d
        movl    %ecx,%r12d
        xorl    %ebx,%r13d
        rorl    $9,%r14d
        xorl    %edx,%r12d
        rorl    $5,%r13d
        xorl    %r9d,%r14d
        andl    %ebx,%r12d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 176-128(%rdi),%xmm10
        xorl    %ebx,%r13d
        addl    44(%rsp),%r8d
        movl    %r9d,%esi
        rorl    $11,%r14d
        xorl    %edx,%r12d
        xorl    %r10d,%esi
        rorl    $6,%r13d
        addl    %r12d,%r8d
        andl    %esi,%r15d
        xorl    %r9d,%r14d
        addl    %r13d,%r8d
        xorl    %r10d,%r15d
        addl    %r8d,%eax
        rorl    $2,%r14d
        addl    %r15d,%r8d
        movl    %eax,%r13d
        addl    %r8d,%r14d
        vmovdqa %xmm6,32(%rsp)
        vpalignr        $4,%xmm3,%xmm0,%xmm4
        rorl    $14,%r13d
        movl    %r14d,%r8d
        vpalignr        $4,%xmm1,%xmm2,%xmm7
        movl    %ebx,%r12d
        xorl    %eax,%r13d
.byte   143,232,120,194,236,14
        rorl    $9,%r14d
        xorl    %ecx,%r12d
        vpsrld  $3,%xmm4,%xmm4
        rorl    $5,%r13d
        xorl    %r8d,%r14d
        vpaddd  %xmm7,%xmm3,%xmm3
        andl    %eax,%r12d
        vpand   %xmm12,%xmm11,%xmm8
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 192-128(%rdi),%xmm10
        xorl    %eax,%r13d
        addl    48(%rsp),%edx
        movl    %r8d,%r15d
.byte   143,232,120,194,245,11
        rorl    $11,%r14d
        xorl    %ecx,%r12d
        vpxor   %xmm5,%xmm4,%xmm4
        xorl    %r9d,%r15d
        rorl    $6,%r13d
        addl    %r12d,%edx
        andl    %r15d,%esi
.byte   143,232,120,194,250,13
        xorl    %r8d,%r14d
        addl    %r13d,%edx
        vpxor   %xmm6,%xmm4,%xmm4
        xorl    %r9d,%esi
        addl    %edx,%r11d
        vpsrld  $10,%xmm2,%xmm6
        rorl    $2,%r14d
        addl    %esi,%edx
        vpaddd  %xmm4,%xmm3,%xmm3
        movl    %r11d,%r13d
        addl    %edx,%r14d
.byte   143,232,120,194,239,2
        rorl    $14,%r13d
        movl    %r14d,%edx
        vpxor   %xmm6,%xmm7,%xmm7
        movl    %eax,%r12d
        xorl    %r11d,%r13d
        rorl    $9,%r14d
        xorl    %ebx,%r12d
        vpxor   %xmm5,%xmm7,%xmm7
        rorl    $5,%r13d
        xorl    %edx,%r14d
        andl    %r11d,%r12d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 208-128(%rdi),%xmm10
        xorl    %r11d,%r13d
        vpsrldq $8,%xmm7,%xmm7
        addl    52(%rsp),%ecx
        movl    %edx,%esi
        rorl    $11,%r14d
        xorl    %ebx,%r12d
        vpaddd  %xmm7,%xmm3,%xmm3
        xorl    %r8d,%esi
        rorl    $6,%r13d
        addl    %r12d,%ecx
        andl    %esi,%r15d
.byte   143,232,120,194,251,13
        xorl    %edx,%r14d
        addl    %r13d,%ecx
        vpsrld  $10,%xmm3,%xmm6
        xorl    %r8d,%r15d
        addl    %ecx,%r10d
.byte   143,232,120,194,239,2
        rorl    $2,%r14d
        addl    %r15d,%ecx
        vpxor   %xmm6,%xmm7,%xmm7
        movl    %r10d,%r13d
        addl    %ecx,%r14d
        rorl    $14,%r13d
        movl    %r14d,%ecx
        vpxor   %xmm5,%xmm7,%xmm7
        movl    %r11d,%r12d
        xorl    %r10d,%r13d
        rorl    $9,%r14d
        xorl    %eax,%r12d
        vpslldq $8,%xmm7,%xmm7
        rorl    $5,%r13d
        xorl    %ecx,%r14d
        andl    %r10d,%r12d
        vpand   %xmm13,%xmm11,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 224-128(%rdi),%xmm10
        xorl    %r10d,%r13d
        vpaddd  %xmm7,%xmm3,%xmm3
        addl    56(%rsp),%ebx
        movl    %ecx,%r15d
        rorl    $11,%r14d
        xorl    %eax,%r12d
        vpaddd  96(%rbp),%xmm3,%xmm6
        xorl    %edx,%r15d
        rorl    $6,%r13d
        addl    %r12d,%ebx
        andl    %r15d,%esi
        xorl    %ecx,%r14d
        addl    %r13d,%ebx
        xorl    %edx,%esi
        addl    %ebx,%r9d
        rorl    $2,%r14d
        addl    %esi,%ebx
        movl    %r9d,%r13d
        addl    %ebx,%r14d
        rorl    $14,%r13d
        movl    %r14d,%ebx
        movl    %r10d,%r12d
        xorl    %r9d,%r13d
        rorl    $9,%r14d
        xorl    %r11d,%r12d
        rorl    $5,%r13d
        xorl    %ebx,%r14d
        andl    %r9d,%r12d
        vpor    %xmm11,%xmm8,%xmm8
        vaesenclast     %xmm10,%xmm9,%xmm11
        vmovdqu 0-128(%rdi),%xmm10
        xorl    %r9d,%r13d
        addl    60(%rsp),%eax
        movl    %ebx,%esi
        rorl    $11,%r14d
        xorl    %r11d,%r12d
        xorl    %ecx,%esi
        rorl    $6,%r13d
        addl    %r12d,%eax
        andl    %esi,%r15d
        xorl    %ebx,%r14d
        addl    %r13d,%eax
        xorl    %ecx,%r15d
        addl    %eax,%r8d
        rorl    $2,%r14d
        addl    %r15d,%eax
        movl    %r8d,%r13d
        addl    %eax,%r14d
        vmovdqa %xmm6,48(%rsp)
        movq    64+0(%rsp),%r12
        vpand   %xmm14,%xmm11,%xmm11
        movq    64+8(%rsp),%r15
        vpor    %xmm11,%xmm8,%xmm8
        vmovdqu %xmm8,(%r15,%r12,1)
        leaq    16(%r12),%r12
        cmpb    $0,131(%rbp)
        jne     .Lxop_00_47
        vmovdqu (%r12),%xmm9
        movq    %r12,64+0(%rsp)
        rorl    $14,%r13d
        movl    %r14d,%eax
        movl    %r9d,%r12d
        xorl    %r8d,%r13d
        rorl    $9,%r14d
        xorl    %r10d,%r12d
        rorl    $5,%r13d
        xorl    %eax,%r14d
        andl    %r8d,%r12d
        vpxor   %xmm10,%xmm9,%xmm9
        vmovdqu 16-128(%rdi),%xmm10
        xorl    %r8d,%r13d
        addl    0(%rsp),%r11d
        movl    %eax,%r15d
        rorl    $11,%r14d
        xorl    %r10d,%r12d
        xorl    %ebx,%r15d
        rorl    $6,%r13d
        addl    %r12d,%r11d
        andl    %r15d,%esi
        xorl    %eax,%r14d
        addl    %r13d,%r11d
        xorl    %ebx,%esi
        addl    %r11d,%edx
        rorl    $2,%r14d
        addl    %esi,%r11d
        movl    %edx,%r13d
        addl    %r11d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r11d
        movl    %r8d,%r12d
        xorl    %edx,%r13d
        rorl    $9,%r14d
        xorl    %r9d,%r12d
        rorl    $5,%r13d
        xorl    %r11d,%r14d
        andl    %edx,%r12d
        vpxor   %xmm8,%xmm9,%xmm9
        xorl    %edx,%r13d
        addl    4(%rsp),%r10d
        movl    %r11d,%esi
        rorl    $11,%r14d
        xorl    %r9d,%r12d
        xorl    %eax,%esi
        rorl    $6,%r13d
        addl    %r12d,%r10d
        andl    %esi,%r15d
        xorl    %r11d,%r14d
        addl    %r13d,%r10d
        xorl    %eax,%r15d
        addl    %r10d,%ecx
        rorl    $2,%r14d
        addl    %r15d,%r10d
        movl    %ecx,%r13d
        addl    %r10d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r10d
        movl    %edx,%r12d
        xorl    %ecx,%r13d
        rorl    $9,%r14d
        xorl    %r8d,%r12d
        rorl    $5,%r13d
        xorl    %r10d,%r14d
        andl    %ecx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 32-128(%rdi),%xmm10
        xorl    %ecx,%r13d
        addl    8(%rsp),%r9d
        movl    %r10d,%r15d
        rorl    $11,%r14d
        xorl    %r8d,%r12d
        xorl    %r11d,%r15d
        rorl    $6,%r13d
        addl    %r12d,%r9d
        andl    %r15d,%esi
        xorl    %r10d,%r14d
        addl    %r13d,%r9d
        xorl    %r11d,%esi
        addl    %r9d,%ebx
        rorl    $2,%r14d
        addl    %esi,%r9d
        movl    %ebx,%r13d
        addl    %r9d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r9d
        movl    %ecx,%r12d
        xorl    %ebx,%r13d
        rorl    $9,%r14d
        xorl    %edx,%r12d
        rorl    $5,%r13d
        xorl    %r9d,%r14d
        andl    %ebx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 48-128(%rdi),%xmm10
        xorl    %ebx,%r13d
        addl    12(%rsp),%r8d
        movl    %r9d,%esi
        rorl    $11,%r14d
        xorl    %edx,%r12d
        xorl    %r10d,%esi
        rorl    $6,%r13d
        addl    %r12d,%r8d
        andl    %esi,%r15d
        xorl    %r9d,%r14d
        addl    %r13d,%r8d
        xorl    %r10d,%r15d
        addl    %r8d,%eax
        rorl    $2,%r14d
        addl    %r15d,%r8d
        movl    %eax,%r13d
        addl    %r8d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r8d
        movl    %ebx,%r12d
        xorl    %eax,%r13d
        rorl    $9,%r14d
        xorl    %ecx,%r12d
        rorl    $5,%r13d
        xorl    %r8d,%r14d
        andl    %eax,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 64-128(%rdi),%xmm10
        xorl    %eax,%r13d
        addl    16(%rsp),%edx
        movl    %r8d,%r15d
        rorl    $11,%r14d
        xorl    %ecx,%r12d
        xorl    %r9d,%r15d
        rorl    $6,%r13d
        addl    %r12d,%edx
        andl    %r15d,%esi
        xorl    %r8d,%r14d
        addl    %r13d,%edx
        xorl    %r9d,%esi
        addl    %edx,%r11d
        rorl    $2,%r14d
        addl    %esi,%edx
        movl    %r11d,%r13d
        addl    %edx,%r14d
        rorl    $14,%r13d
        movl    %r14d,%edx
        movl    %eax,%r12d
        xorl    %r11d,%r13d
        rorl    $9,%r14d
        xorl    %ebx,%r12d
        rorl    $5,%r13d
        xorl    %edx,%r14d
        andl    %r11d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 80-128(%rdi),%xmm10
        xorl    %r11d,%r13d
        addl    20(%rsp),%ecx
        movl    %edx,%esi
        rorl    $11,%r14d
        xorl    %ebx,%r12d
        xorl    %r8d,%esi
        rorl    $6,%r13d
        addl    %r12d,%ecx
        andl    %esi,%r15d
        xorl    %edx,%r14d
        addl    %r13d,%ecx
        xorl    %r8d,%r15d
        addl    %ecx,%r10d
        rorl    $2,%r14d
        addl    %r15d,%ecx
        movl    %r10d,%r13d
        addl    %ecx,%r14d
        rorl    $14,%r13d
        movl    %r14d,%ecx
        movl    %r11d,%r12d
        xorl    %r10d,%r13d
        rorl    $9,%r14d
        xorl    %eax,%r12d
        rorl    $5,%r13d
        xorl    %ecx,%r14d
        andl    %r10d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 96-128(%rdi),%xmm10
        xorl    %r10d,%r13d
        addl    24(%rsp),%ebx
        movl    %ecx,%r15d
        rorl    $11,%r14d
        xorl    %eax,%r12d
        xorl    %edx,%r15d
        rorl    $6,%r13d
        addl    %r12d,%ebx
        andl    %r15d,%esi
        xorl    %ecx,%r14d
        addl    %r13d,%ebx
        xorl    %edx,%esi
        addl    %ebx,%r9d
        rorl    $2,%r14d
        addl    %esi,%ebx
        movl    %r9d,%r13d
        addl    %ebx,%r14d
        rorl    $14,%r13d
        movl    %r14d,%ebx
        movl    %r10d,%r12d
        xorl    %r9d,%r13d
        rorl    $9,%r14d
        xorl    %r11d,%r12d
        rorl    $5,%r13d
        xorl    %ebx,%r14d
        andl    %r9d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 112-128(%rdi),%xmm10
        xorl    %r9d,%r13d
        addl    28(%rsp),%eax
        movl    %ebx,%esi
        rorl    $11,%r14d
        xorl    %r11d,%r12d
        xorl    %ecx,%esi
        rorl    $6,%r13d
        addl    %r12d,%eax
        andl    %esi,%r15d
        xorl    %ebx,%r14d
        addl    %r13d,%eax
        xorl    %ecx,%r15d
        addl    %eax,%r8d
        rorl    $2,%r14d
        addl    %r15d,%eax
        movl    %r8d,%r13d
        addl    %eax,%r14d
        rorl    $14,%r13d
        movl    %r14d,%eax
        movl    %r9d,%r12d
        xorl    %r8d,%r13d
        rorl    $9,%r14d
        xorl    %r10d,%r12d
        rorl    $5,%r13d
        xorl    %eax,%r14d
        andl    %r8d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 128-128(%rdi),%xmm10
        xorl    %r8d,%r13d
        addl    32(%rsp),%r11d
        movl    %eax,%r15d
        rorl    $11,%r14d
        xorl    %r10d,%r12d
        xorl    %ebx,%r15d
        rorl    $6,%r13d
        addl    %r12d,%r11d
        andl    %r15d,%esi
        xorl    %eax,%r14d
        addl    %r13d,%r11d
        xorl    %ebx,%esi
        addl    %r11d,%edx
        rorl    $2,%r14d
        addl    %esi,%r11d
        movl    %edx,%r13d
        addl    %r11d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r11d
        movl    %r8d,%r12d
        xorl    %edx,%r13d
        rorl    $9,%r14d
        xorl    %r9d,%r12d
        rorl    $5,%r13d
        xorl    %r11d,%r14d
        andl    %edx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 144-128(%rdi),%xmm10
        xorl    %edx,%r13d
        addl    36(%rsp),%r10d
        movl    %r11d,%esi
        rorl    $11,%r14d
        xorl    %r9d,%r12d
        xorl    %eax,%esi
        rorl    $6,%r13d
        addl    %r12d,%r10d
        andl    %esi,%r15d
        xorl    %r11d,%r14d
        addl    %r13d,%r10d
        xorl    %eax,%r15d
        addl    %r10d,%ecx
        rorl    $2,%r14d
        addl    %r15d,%r10d
        movl    %ecx,%r13d
        addl    %r10d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r10d
        movl    %edx,%r12d
        xorl    %ecx,%r13d
        rorl    $9,%r14d
        xorl    %r8d,%r12d
        rorl    $5,%r13d
        xorl    %r10d,%r14d
        andl    %ecx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 160-128(%rdi),%xmm10
        xorl    %ecx,%r13d
        addl    40(%rsp),%r9d
        movl    %r10d,%r15d
        rorl    $11,%r14d
        xorl    %r8d,%r12d
        xorl    %r11d,%r15d
        rorl    $6,%r13d
        addl    %r12d,%r9d
        andl    %r15d,%esi
        xorl    %r10d,%r14d
        addl    %r13d,%r9d
        xorl    %r11d,%esi
        addl    %r9d,%ebx
        rorl    $2,%r14d
        addl    %esi,%r9d
        movl    %ebx,%r13d
        addl    %r9d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r9d
        movl    %ecx,%r12d
        xorl    %ebx,%r13d
        rorl    $9,%r14d
        xorl    %edx,%r12d
        rorl    $5,%r13d
        xorl    %r9d,%r14d
        andl    %ebx,%r12d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 176-128(%rdi),%xmm10
        xorl    %ebx,%r13d
        addl    44(%rsp),%r8d
        movl    %r9d,%esi
        rorl    $11,%r14d
        xorl    %edx,%r12d
        xorl    %r10d,%esi
        rorl    $6,%r13d
        addl    %r12d,%r8d
        andl    %esi,%r15d
        xorl    %r9d,%r14d
        addl    %r13d,%r8d
        xorl    %r10d,%r15d
        addl    %r8d,%eax
        rorl    $2,%r14d
        addl    %r15d,%r8d
        movl    %eax,%r13d
        addl    %r8d,%r14d
        rorl    $14,%r13d
        movl    %r14d,%r8d
        movl    %ebx,%r12d
        xorl    %eax,%r13d
        rorl    $9,%r14d
        xorl    %ecx,%r12d
        rorl    $5,%r13d
        xorl    %r8d,%r14d
        andl    %eax,%r12d
        vpand   %xmm12,%xmm11,%xmm8
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 192-128(%rdi),%xmm10
        xorl    %eax,%r13d
        addl    48(%rsp),%edx
        movl    %r8d,%r15d
        rorl    $11,%r14d
        xorl    %ecx,%r12d
        xorl    %r9d,%r15d
        rorl    $6,%r13d
        addl    %r12d,%edx
        andl    %r15d,%esi
        xorl    %r8d,%r14d
        addl    %r13d,%edx
        xorl    %r9d,%esi
        addl    %edx,%r11d
        rorl    $2,%r14d
        addl    %esi,%edx
        movl    %r11d,%r13d
        addl    %edx,%r14d
        rorl    $14,%r13d
        movl    %r14d,%edx
        movl    %eax,%r12d
        xorl    %r11d,%r13d
        rorl    $9,%r14d
        xorl    %ebx,%r12d
        rorl    $5,%r13d
        xorl    %edx,%r14d
        andl    %r11d,%r12d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 208-128(%rdi),%xmm10
        xorl    %r11d,%r13d
        addl    52(%rsp),%ecx
        movl    %edx,%esi
        rorl    $11,%r14d
        xorl    %ebx,%r12d
        xorl    %r8d,%esi
        rorl    $6,%r13d
        addl    %r12d,%ecx
        andl    %esi,%r15d
        xorl    %edx,%r14d
        addl    %r13d,%ecx
        xorl    %r8d,%r15d
        addl    %ecx,%r10d
        rorl    $2,%r14d
        addl    %r15d,%ecx
        movl    %r10d,%r13d
        addl    %ecx,%r14d
        rorl    $14,%r13d
        movl    %r14d,%ecx
        movl    %r11d,%r12d
        xorl    %r10d,%r13d
        rorl    $9,%r14d
        xorl    %eax,%r12d
        rorl    $5,%r13d
        xorl    %ecx,%r14d
        andl    %r10d,%r12d
        vpand   %xmm13,%xmm11,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 224-128(%rdi),%xmm10
        xorl    %r10d,%r13d
        addl    56(%rsp),%ebx
        movl    %ecx,%r15d
        rorl    $11,%r14d
        xorl    %eax,%r12d
        xorl    %edx,%r15d
        rorl    $6,%r13d
        addl    %r12d,%ebx
        andl    %r15d,%esi
        xorl    %ecx,%r14d
        addl    %r13d,%ebx
        xorl    %edx,%esi
        addl    %ebx,%r9d
        rorl    $2,%r14d
        addl    %esi,%ebx
        movl    %r9d,%r13d
        addl    %ebx,%r14d
        rorl    $14,%r13d
        movl    %r14d,%ebx
        movl    %r10d,%r12d
        xorl    %r9d,%r13d
        rorl    $9,%r14d
        xorl    %r11d,%r12d
        rorl    $5,%r13d
        xorl    %ebx,%r14d
        andl    %r9d,%r12d
        vpor    %xmm11,%xmm8,%xmm8
        vaesenclast     %xmm10,%xmm9,%xmm11
        vmovdqu 0-128(%rdi),%xmm10
        xorl    %r9d,%r13d
        addl    60(%rsp),%eax
        movl    %ebx,%esi
        rorl    $11,%r14d
        xorl    %r11d,%r12d
        xorl    %ecx,%esi
        rorl    $6,%r13d
        addl    %r12d,%eax
        andl    %esi,%r15d
        xorl    %ebx,%r14d
        addl    %r13d,%eax
        xorl    %ecx,%r15d
        addl    %eax,%r8d
        rorl    $2,%r14d
        addl    %r15d,%eax
        movl    %r8d,%r13d
        addl    %eax,%r14d
        movq    64+0(%rsp),%r12
        movq    64+8(%rsp),%r13
        movq    64+40(%rsp),%r15
        movq    64+48(%rsp),%rsi

        vpand   %xmm14,%xmm11,%xmm11
        movl    %r14d,%eax
        vpor    %xmm11,%xmm8,%xmm8
        vmovdqu %xmm8,(%r12,%r13,1)
        leaq    16(%r12),%r12

        addl    0(%r15),%eax
        addl    4(%r15),%ebx
        addl    8(%r15),%ecx
        addl    12(%r15),%edx
        addl    16(%r15),%r8d
        addl    20(%r15),%r9d
        addl    24(%r15),%r10d
        addl    28(%r15),%r11d

        cmpq    64+16(%rsp),%r12

        movl    %eax,0(%r15)
        movl    %ebx,4(%r15)
        movl    %ecx,8(%r15)
        movl    %edx,12(%r15)
        movl    %r8d,16(%r15)
        movl    %r9d,20(%r15)
        movl    %r10d,24(%r15)
        movl    %r11d,28(%r15)

        jb      .Lloop_xop

        movq    64+32(%rsp),%r8
        movq    120(%rsp),%rsi
.cfi_def_cfa    %rsi,8
        vmovdqu %xmm8,(%r8)
        vzeroall
        movq    -48(%rsi),%r15
.cfi_restore    %r15
        movq    -40(%rsi),%r14
.cfi_restore    %r14
        movq    -32(%rsi),%r13
.cfi_restore    %r13
        movq    -24(%rsi),%r12
.cfi_restore    %r12
        movq    -16(%rsi),%rbp
.cfi_restore    %rbp
        movq    -8(%rsi),%rbx
.cfi_restore    %rbx
        leaq    (%rsi),%rsp
.cfi_def_cfa_register   %rsp
.Lepilogue_xop:
        .byte   0xf3,0xc3
.cfi_endproc
.size   aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop
.type   aesni_cbc_sha256_enc_avx,@function
.align  64
aesni_cbc_sha256_enc_avx:
.cfi_startproc
.Lavx_shortcut:
        movq    8(%rsp),%r10
        movq    %rsp,%rax
.cfi_def_cfa_register   %rax
        pushq   %rbx
.cfi_offset     %rbx,-16
        pushq   %rbp
.cfi_offset     %rbp,-24
        pushq   %r12
.cfi_offset     %r12,-32
        pushq   %r13
.cfi_offset     %r13,-40
        pushq   %r14
.cfi_offset     %r14,-48
        pushq   %r15
.cfi_offset     %r15,-56
        subq    $128,%rsp
        andq    $-64,%rsp

        shlq    $6,%rdx
        subq    %rdi,%rsi
        subq    %rdi,%r10
        addq    %rdi,%rdx


        movq    %rsi,64+8(%rsp)
        movq    %rdx,64+16(%rsp)

        movq    %r8,64+32(%rsp)
        movq    %r9,64+40(%rsp)
        movq    %r10,64+48(%rsp)
        movq    %rax,120(%rsp)
.cfi_escape     0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_avx:
        vzeroall

        movq    %rdi,%r12
        leaq    128(%rcx),%rdi
        leaq    K256+544(%rip),%r13
        movl    240-128(%rdi),%r14d
        movq    %r9,%r15
        movq    %r10,%rsi
        vmovdqu (%r8),%xmm8
        subq    $9,%r14

        movl    0(%r15),%eax
        movl    4(%r15),%ebx
        movl    8(%r15),%ecx
        movl    12(%r15),%edx
        movl    16(%r15),%r8d
        movl    20(%r15),%r9d
        movl    24(%r15),%r10d
        movl    28(%r15),%r11d

        vmovdqa 0(%r13,%r14,8),%xmm14
        vmovdqa 16(%r13,%r14,8),%xmm13
        vmovdqa 32(%r13,%r14,8),%xmm12
        vmovdqu 0-128(%rdi),%xmm10
        jmp     .Lloop_avx
.align  16
.Lloop_avx:
        vmovdqa K256+512(%rip),%xmm7
        vmovdqu 0(%rsi,%r12,1),%xmm0
        vmovdqu 16(%rsi,%r12,1),%xmm1
        vmovdqu 32(%rsi,%r12,1),%xmm2
        vmovdqu 48(%rsi,%r12,1),%xmm3
        vpshufb %xmm7,%xmm0,%xmm0
        leaq    K256(%rip),%rbp
        vpshufb %xmm7,%xmm1,%xmm1
        vpshufb %xmm7,%xmm2,%xmm2
        vpaddd  0(%rbp),%xmm0,%xmm4
        vpshufb %xmm7,%xmm3,%xmm3
        vpaddd  32(%rbp),%xmm1,%xmm5
        vpaddd  64(%rbp),%xmm2,%xmm6
        vpaddd  96(%rbp),%xmm3,%xmm7
        vmovdqa %xmm4,0(%rsp)
        movl    %eax,%r14d
        vmovdqa %xmm5,16(%rsp)
        movl    %ebx,%esi
        vmovdqa %xmm6,32(%rsp)
        xorl    %ecx,%esi
        vmovdqa %xmm7,48(%rsp)
        movl    %r8d,%r13d
        jmp     .Lavx_00_47

.align  16
.Lavx_00_47:
        subq    $-32*4,%rbp
        vmovdqu (%r12),%xmm9
        movq    %r12,64+0(%rsp)
        vpalignr        $4,%xmm0,%xmm1,%xmm4
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%eax
        movl    %r9d,%r12d
        vpalignr        $4,%xmm2,%xmm3,%xmm7
        xorl    %r8d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r10d,%r12d
        vpsrld  $7,%xmm4,%xmm6
        shrdl   $5,%r13d,%r13d
        xorl    %eax,%r14d
        andl    %r8d,%r12d
        vpaddd  %xmm7,%xmm0,%xmm0
        vpxor   %xmm10,%xmm9,%xmm9
        vmovdqu 16-128(%rdi),%xmm10
        xorl    %r8d,%r13d
        addl    0(%rsp),%r11d
        movl    %eax,%r15d
        vpsrld  $3,%xmm4,%xmm7
        shrdl   $11,%r14d,%r14d
        xorl    %r10d,%r12d
        xorl    %ebx,%r15d
        vpslld  $14,%xmm4,%xmm5
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r11d
        andl    %r15d,%esi
        vpxor   %xmm6,%xmm7,%xmm4
        xorl    %eax,%r14d
        addl    %r13d,%r11d
        xorl    %ebx,%esi
        vpshufd $250,%xmm3,%xmm7
        addl    %r11d,%edx
        shrdl   $2,%r14d,%r14d
        addl    %esi,%r11d
        vpsrld  $11,%xmm6,%xmm6
        movl    %edx,%r13d
        addl    %r11d,%r14d
        shrdl   $14,%r13d,%r13d
        vpxor   %xmm5,%xmm4,%xmm4
        movl    %r14d,%r11d
        movl    %r8d,%r12d
        xorl    %edx,%r13d
        vpslld  $11,%xmm5,%xmm5
        shrdl   $9,%r14d,%r14d
        xorl    %r9d,%r12d
        shrdl   $5,%r13d,%r13d
        vpxor   %xmm6,%xmm4,%xmm4
        xorl    %r11d,%r14d
        andl    %edx,%r12d
        vpxor   %xmm8,%xmm9,%xmm9
        xorl    %edx,%r13d
        vpsrld  $10,%xmm7,%xmm6
        addl    4(%rsp),%r10d
        movl    %r11d,%esi
        shrdl   $11,%r14d,%r14d
        vpxor   %xmm5,%xmm4,%xmm4
        xorl    %r9d,%r12d
        xorl    %eax,%esi
        shrdl   $6,%r13d,%r13d
        vpsrlq  $17,%xmm7,%xmm7
        addl    %r12d,%r10d
        andl    %esi,%r15d
        xorl    %r11d,%r14d
        vpaddd  %xmm4,%xmm0,%xmm0
        addl    %r13d,%r10d
        xorl    %eax,%r15d
        addl    %r10d,%ecx
        vpxor   %xmm7,%xmm6,%xmm6
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%r10d
        movl    %ecx,%r13d
        vpsrlq  $2,%xmm7,%xmm7
        addl    %r10d,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r10d
        vpxor   %xmm7,%xmm6,%xmm6
        movl    %edx,%r12d
        xorl    %ecx,%r13d
        shrdl   $9,%r14d,%r14d
        vpshufd $132,%xmm6,%xmm6
        xorl    %r8d,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %r10d,%r14d
        vpsrldq $8,%xmm6,%xmm6
        andl    %ecx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 32-128(%rdi),%xmm10
        xorl    %ecx,%r13d
        addl    8(%rsp),%r9d
        vpaddd  %xmm6,%xmm0,%xmm0
        movl    %r10d,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %r8d,%r12d
        vpshufd $80,%xmm0,%xmm7
        xorl    %r11d,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r9d
        vpsrld  $10,%xmm7,%xmm6
        andl    %r15d,%esi
        xorl    %r10d,%r14d
        addl    %r13d,%r9d
        vpsrlq  $17,%xmm7,%xmm7
        xorl    %r11d,%esi
        addl    %r9d,%ebx
        shrdl   $2,%r14d,%r14d
        vpxor   %xmm7,%xmm6,%xmm6
        addl    %esi,%r9d
        movl    %ebx,%r13d
        addl    %r9d,%r14d
        vpsrlq  $2,%xmm7,%xmm7
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r9d
        movl    %ecx,%r12d
        vpxor   %xmm7,%xmm6,%xmm6
        xorl    %ebx,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %edx,%r12d
        vpshufd $232,%xmm6,%xmm6
        shrdl   $5,%r13d,%r13d
        xorl    %r9d,%r14d
        andl    %ebx,%r12d
        vpslldq $8,%xmm6,%xmm6
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 48-128(%rdi),%xmm10
        xorl    %ebx,%r13d
        addl    12(%rsp),%r8d
        movl    %r9d,%esi
        vpaddd  %xmm6,%xmm0,%xmm0
        shrdl   $11,%r14d,%r14d
        xorl    %edx,%r12d
        xorl    %r10d,%esi
        vpaddd  0(%rbp),%xmm0,%xmm6
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r8d
        andl    %esi,%r15d
        xorl    %r9d,%r14d
        addl    %r13d,%r8d
        xorl    %r10d,%r15d
        addl    %r8d,%eax
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%r8d
        movl    %eax,%r13d
        addl    %r8d,%r14d
        vmovdqa %xmm6,0(%rsp)
        vpalignr        $4,%xmm1,%xmm2,%xmm4
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r8d
        movl    %ebx,%r12d
        vpalignr        $4,%xmm3,%xmm0,%xmm7
        xorl    %eax,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %ecx,%r12d
        vpsrld  $7,%xmm4,%xmm6
        shrdl   $5,%r13d,%r13d
        xorl    %r8d,%r14d
        andl    %eax,%r12d
        vpaddd  %xmm7,%xmm1,%xmm1
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 64-128(%rdi),%xmm10
        xorl    %eax,%r13d
        addl    16(%rsp),%edx
        movl    %r8d,%r15d
        vpsrld  $3,%xmm4,%xmm7
        shrdl   $11,%r14d,%r14d
        xorl    %ecx,%r12d
        xorl    %r9d,%r15d
        vpslld  $14,%xmm4,%xmm5
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%edx
        andl    %r15d,%esi
        vpxor   %xmm6,%xmm7,%xmm4
        xorl    %r8d,%r14d
        addl    %r13d,%edx
        xorl    %r9d,%esi
        vpshufd $250,%xmm0,%xmm7
        addl    %edx,%r11d
        shrdl   $2,%r14d,%r14d
        addl    %esi,%edx
        vpsrld  $11,%xmm6,%xmm6
        movl    %r11d,%r13d
        addl    %edx,%r14d
        shrdl   $14,%r13d,%r13d
        vpxor   %xmm5,%xmm4,%xmm4
        movl    %r14d,%edx
        movl    %eax,%r12d
        xorl    %r11d,%r13d
        vpslld  $11,%xmm5,%xmm5
        shrdl   $9,%r14d,%r14d
        xorl    %ebx,%r12d
        shrdl   $5,%r13d,%r13d
        vpxor   %xmm6,%xmm4,%xmm4
        xorl    %edx,%r14d
        andl    %r11d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 80-128(%rdi),%xmm10
        xorl    %r11d,%r13d
        vpsrld  $10,%xmm7,%xmm6
        addl    20(%rsp),%ecx
        movl    %edx,%esi
        shrdl   $11,%r14d,%r14d
        vpxor   %xmm5,%xmm4,%xmm4
        xorl    %ebx,%r12d
        xorl    %r8d,%esi
        shrdl   $6,%r13d,%r13d
        vpsrlq  $17,%xmm7,%xmm7
        addl    %r12d,%ecx
        andl    %esi,%r15d
        xorl    %edx,%r14d
        vpaddd  %xmm4,%xmm1,%xmm1
        addl    %r13d,%ecx
        xorl    %r8d,%r15d
        addl    %ecx,%r10d
        vpxor   %xmm7,%xmm6,%xmm6
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%ecx
        movl    %r10d,%r13d
        vpsrlq  $2,%xmm7,%xmm7
        addl    %ecx,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%ecx
        vpxor   %xmm7,%xmm6,%xmm6
        movl    %r11d,%r12d
        xorl    %r10d,%r13d
        shrdl   $9,%r14d,%r14d
        vpshufd $132,%xmm6,%xmm6
        xorl    %eax,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %ecx,%r14d
        vpsrldq $8,%xmm6,%xmm6
        andl    %r10d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 96-128(%rdi),%xmm10
        xorl    %r10d,%r13d
        addl    24(%rsp),%ebx
        vpaddd  %xmm6,%xmm1,%xmm1
        movl    %ecx,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %eax,%r12d
        vpshufd $80,%xmm1,%xmm7
        xorl    %edx,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%ebx
        vpsrld  $10,%xmm7,%xmm6
        andl    %r15d,%esi
        xorl    %ecx,%r14d
        addl    %r13d,%ebx
        vpsrlq  $17,%xmm7,%xmm7
        xorl    %edx,%esi
        addl    %ebx,%r9d
        shrdl   $2,%r14d,%r14d
        vpxor   %xmm7,%xmm6,%xmm6
        addl    %esi,%ebx
        movl    %r9d,%r13d
        addl    %ebx,%r14d
        vpsrlq  $2,%xmm7,%xmm7
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%ebx
        movl    %r10d,%r12d
        vpxor   %xmm7,%xmm6,%xmm6
        xorl    %r9d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r11d,%r12d
        vpshufd $232,%xmm6,%xmm6
        shrdl   $5,%r13d,%r13d
        xorl    %ebx,%r14d
        andl    %r9d,%r12d
        vpslldq $8,%xmm6,%xmm6
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 112-128(%rdi),%xmm10
        xorl    %r9d,%r13d
        addl    28(%rsp),%eax
        movl    %ebx,%esi
        vpaddd  %xmm6,%xmm1,%xmm1
        shrdl   $11,%r14d,%r14d
        xorl    %r11d,%r12d
        xorl    %ecx,%esi
        vpaddd  32(%rbp),%xmm1,%xmm6
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%eax
        andl    %esi,%r15d
        xorl    %ebx,%r14d
        addl    %r13d,%eax
        xorl    %ecx,%r15d
        addl    %eax,%r8d
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%eax
        movl    %r8d,%r13d
        addl    %eax,%r14d
        vmovdqa %xmm6,16(%rsp)
        vpalignr        $4,%xmm2,%xmm3,%xmm4
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%eax
        movl    %r9d,%r12d
        vpalignr        $4,%xmm0,%xmm1,%xmm7
        xorl    %r8d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r10d,%r12d
        vpsrld  $7,%xmm4,%xmm6
        shrdl   $5,%r13d,%r13d
        xorl    %eax,%r14d
        andl    %r8d,%r12d
        vpaddd  %xmm7,%xmm2,%xmm2
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 128-128(%rdi),%xmm10
        xorl    %r8d,%r13d
        addl    32(%rsp),%r11d
        movl    %eax,%r15d
        vpsrld  $3,%xmm4,%xmm7
        shrdl   $11,%r14d,%r14d
        xorl    %r10d,%r12d
        xorl    %ebx,%r15d
        vpslld  $14,%xmm4,%xmm5
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r11d
        andl    %r15d,%esi
        vpxor   %xmm6,%xmm7,%xmm4
        xorl    %eax,%r14d
        addl    %r13d,%r11d
        xorl    %ebx,%esi
        vpshufd $250,%xmm1,%xmm7
        addl    %r11d,%edx
        shrdl   $2,%r14d,%r14d
        addl    %esi,%r11d
        vpsrld  $11,%xmm6,%xmm6
        movl    %edx,%r13d
        addl    %r11d,%r14d
        shrdl   $14,%r13d,%r13d
        vpxor   %xmm5,%xmm4,%xmm4
        movl    %r14d,%r11d
        movl    %r8d,%r12d
        xorl    %edx,%r13d
        vpslld  $11,%xmm5,%xmm5
        shrdl   $9,%r14d,%r14d
        xorl    %r9d,%r12d
        shrdl   $5,%r13d,%r13d
        vpxor   %xmm6,%xmm4,%xmm4
        xorl    %r11d,%r14d
        andl    %edx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 144-128(%rdi),%xmm10
        xorl    %edx,%r13d
        vpsrld  $10,%xmm7,%xmm6
        addl    36(%rsp),%r10d
        movl    %r11d,%esi
        shrdl   $11,%r14d,%r14d
        vpxor   %xmm5,%xmm4,%xmm4
        xorl    %r9d,%r12d
        xorl    %eax,%esi
        shrdl   $6,%r13d,%r13d
        vpsrlq  $17,%xmm7,%xmm7
        addl    %r12d,%r10d
        andl    %esi,%r15d
        xorl    %r11d,%r14d
        vpaddd  %xmm4,%xmm2,%xmm2
        addl    %r13d,%r10d
        xorl    %eax,%r15d
        addl    %r10d,%ecx
        vpxor   %xmm7,%xmm6,%xmm6
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%r10d
        movl    %ecx,%r13d
        vpsrlq  $2,%xmm7,%xmm7
        addl    %r10d,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r10d
        vpxor   %xmm7,%xmm6,%xmm6
        movl    %edx,%r12d
        xorl    %ecx,%r13d
        shrdl   $9,%r14d,%r14d
        vpshufd $132,%xmm6,%xmm6
        xorl    %r8d,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %r10d,%r14d
        vpsrldq $8,%xmm6,%xmm6
        andl    %ecx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 160-128(%rdi),%xmm10
        xorl    %ecx,%r13d
        addl    40(%rsp),%r9d
        vpaddd  %xmm6,%xmm2,%xmm2
        movl    %r10d,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %r8d,%r12d
        vpshufd $80,%xmm2,%xmm7
        xorl    %r11d,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r9d
        vpsrld  $10,%xmm7,%xmm6
        andl    %r15d,%esi
        xorl    %r10d,%r14d
        addl    %r13d,%r9d
        vpsrlq  $17,%xmm7,%xmm7
        xorl    %r11d,%esi
        addl    %r9d,%ebx
        shrdl   $2,%r14d,%r14d
        vpxor   %xmm7,%xmm6,%xmm6
        addl    %esi,%r9d
        movl    %ebx,%r13d
        addl    %r9d,%r14d
        vpsrlq  $2,%xmm7,%xmm7
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r9d
        movl    %ecx,%r12d
        vpxor   %xmm7,%xmm6,%xmm6
        xorl    %ebx,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %edx,%r12d
        vpshufd $232,%xmm6,%xmm6
        shrdl   $5,%r13d,%r13d
        xorl    %r9d,%r14d
        andl    %ebx,%r12d
        vpslldq $8,%xmm6,%xmm6
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 176-128(%rdi),%xmm10
        xorl    %ebx,%r13d
        addl    44(%rsp),%r8d
        movl    %r9d,%esi
        vpaddd  %xmm6,%xmm2,%xmm2
        shrdl   $11,%r14d,%r14d
        xorl    %edx,%r12d
        xorl    %r10d,%esi
        vpaddd  64(%rbp),%xmm2,%xmm6
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r8d
        andl    %esi,%r15d
        xorl    %r9d,%r14d
        addl    %r13d,%r8d
        xorl    %r10d,%r15d
        addl    %r8d,%eax
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%r8d
        movl    %eax,%r13d
        addl    %r8d,%r14d
        vmovdqa %xmm6,32(%rsp)
        vpalignr        $4,%xmm3,%xmm0,%xmm4
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r8d
        movl    %ebx,%r12d
        vpalignr        $4,%xmm1,%xmm2,%xmm7
        xorl    %eax,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %ecx,%r12d
        vpsrld  $7,%xmm4,%xmm6
        shrdl   $5,%r13d,%r13d
        xorl    %r8d,%r14d
        andl    %eax,%r12d
        vpaddd  %xmm7,%xmm3,%xmm3
        vpand   %xmm12,%xmm11,%xmm8
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 192-128(%rdi),%xmm10
        xorl    %eax,%r13d
        addl    48(%rsp),%edx
        movl    %r8d,%r15d
        vpsrld  $3,%xmm4,%xmm7
        shrdl   $11,%r14d,%r14d
        xorl    %ecx,%r12d
        xorl    %r9d,%r15d
        vpslld  $14,%xmm4,%xmm5
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%edx
        andl    %r15d,%esi
        vpxor   %xmm6,%xmm7,%xmm4
        xorl    %r8d,%r14d
        addl    %r13d,%edx
        xorl    %r9d,%esi
        vpshufd $250,%xmm2,%xmm7
        addl    %edx,%r11d
        shrdl   $2,%r14d,%r14d
        addl    %esi,%edx
        vpsrld  $11,%xmm6,%xmm6
        movl    %r11d,%r13d
        addl    %edx,%r14d
        shrdl   $14,%r13d,%r13d
        vpxor   %xmm5,%xmm4,%xmm4
        movl    %r14d,%edx
        movl    %eax,%r12d
        xorl    %r11d,%r13d
        vpslld  $11,%xmm5,%xmm5
        shrdl   $9,%r14d,%r14d
        xorl    %ebx,%r12d
        shrdl   $5,%r13d,%r13d
        vpxor   %xmm6,%xmm4,%xmm4
        xorl    %edx,%r14d
        andl    %r11d,%r12d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 208-128(%rdi),%xmm10
        xorl    %r11d,%r13d
        vpsrld  $10,%xmm7,%xmm6
        addl    52(%rsp),%ecx
        movl    %edx,%esi
        shrdl   $11,%r14d,%r14d
        vpxor   %xmm5,%xmm4,%xmm4
        xorl    %ebx,%r12d
        xorl    %r8d,%esi
        shrdl   $6,%r13d,%r13d
        vpsrlq  $17,%xmm7,%xmm7
        addl    %r12d,%ecx
        andl    %esi,%r15d
        xorl    %edx,%r14d
        vpaddd  %xmm4,%xmm3,%xmm3
        addl    %r13d,%ecx
        xorl    %r8d,%r15d
        addl    %ecx,%r10d
        vpxor   %xmm7,%xmm6,%xmm6
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%ecx
        movl    %r10d,%r13d
        vpsrlq  $2,%xmm7,%xmm7
        addl    %ecx,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%ecx
        vpxor   %xmm7,%xmm6,%xmm6
        movl    %r11d,%r12d
        xorl    %r10d,%r13d
        shrdl   $9,%r14d,%r14d
        vpshufd $132,%xmm6,%xmm6
        xorl    %eax,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %ecx,%r14d
        vpsrldq $8,%xmm6,%xmm6
        andl    %r10d,%r12d
        vpand   %xmm13,%xmm11,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 224-128(%rdi),%xmm10
        xorl    %r10d,%r13d
        addl    56(%rsp),%ebx
        vpaddd  %xmm6,%xmm3,%xmm3
        movl    %ecx,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %eax,%r12d
        vpshufd $80,%xmm3,%xmm7
        xorl    %edx,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%ebx
        vpsrld  $10,%xmm7,%xmm6
        andl    %r15d,%esi
        xorl    %ecx,%r14d
        addl    %r13d,%ebx
        vpsrlq  $17,%xmm7,%xmm7
        xorl    %edx,%esi
        addl    %ebx,%r9d
        shrdl   $2,%r14d,%r14d
        vpxor   %xmm7,%xmm6,%xmm6
        addl    %esi,%ebx
        movl    %r9d,%r13d
        addl    %ebx,%r14d
        vpsrlq  $2,%xmm7,%xmm7
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%ebx
        movl    %r10d,%r12d
        vpxor   %xmm7,%xmm6,%xmm6
        xorl    %r9d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r11d,%r12d
        vpshufd $232,%xmm6,%xmm6
        shrdl   $5,%r13d,%r13d
        xorl    %ebx,%r14d
        andl    %r9d,%r12d
        vpslldq $8,%xmm6,%xmm6
        vpor    %xmm11,%xmm8,%xmm8
        vaesenclast     %xmm10,%xmm9,%xmm11
        vmovdqu 0-128(%rdi),%xmm10
        xorl    %r9d,%r13d
        addl    60(%rsp),%eax
        movl    %ebx,%esi
        vpaddd  %xmm6,%xmm3,%xmm3
        shrdl   $11,%r14d,%r14d
        xorl    %r11d,%r12d
        xorl    %ecx,%esi
        vpaddd  96(%rbp),%xmm3,%xmm6
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%eax
        andl    %esi,%r15d
        xorl    %ebx,%r14d
        addl    %r13d,%eax
        xorl    %ecx,%r15d
        addl    %eax,%r8d
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%eax
        movl    %r8d,%r13d
        addl    %eax,%r14d
        vmovdqa %xmm6,48(%rsp)
        movq    64+0(%rsp),%r12
        vpand   %xmm14,%xmm11,%xmm11
        movq    64+8(%rsp),%r15
        vpor    %xmm11,%xmm8,%xmm8
        vmovdqu %xmm8,(%r15,%r12,1)
        leaq    16(%r12),%r12
        cmpb    $0,131(%rbp)
        jne     .Lavx_00_47
        vmovdqu (%r12),%xmm9
        movq    %r12,64+0(%rsp)
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%eax
        movl    %r9d,%r12d
        xorl    %r8d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r10d,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %eax,%r14d
        andl    %r8d,%r12d
        vpxor   %xmm10,%xmm9,%xmm9
        vmovdqu 16-128(%rdi),%xmm10
        xorl    %r8d,%r13d
        addl    0(%rsp),%r11d
        movl    %eax,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %r10d,%r12d
        xorl    %ebx,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r11d
        andl    %r15d,%esi
        xorl    %eax,%r14d
        addl    %r13d,%r11d
        xorl    %ebx,%esi
        addl    %r11d,%edx
        shrdl   $2,%r14d,%r14d
        addl    %esi,%r11d
        movl    %edx,%r13d
        addl    %r11d,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r11d
        movl    %r8d,%r12d
        xorl    %edx,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r9d,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %r11d,%r14d
        andl    %edx,%r12d
        vpxor   %xmm8,%xmm9,%xmm9
        xorl    %edx,%r13d
        addl    4(%rsp),%r10d
        movl    %r11d,%esi
        shrdl   $11,%r14d,%r14d
        xorl    %r9d,%r12d
        xorl    %eax,%esi
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r10d
        andl    %esi,%r15d
        xorl    %r11d,%r14d
        addl    %r13d,%r10d
        xorl    %eax,%r15d
        addl    %r10d,%ecx
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%r10d
        movl    %ecx,%r13d
        addl    %r10d,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r10d
        movl    %edx,%r12d
        xorl    %ecx,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r8d,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %r10d,%r14d
        andl    %ecx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 32-128(%rdi),%xmm10
        xorl    %ecx,%r13d
        addl    8(%rsp),%r9d
        movl    %r10d,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %r8d,%r12d
        xorl    %r11d,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r9d
        andl    %r15d,%esi
        xorl    %r10d,%r14d
        addl    %r13d,%r9d
        xorl    %r11d,%esi
        addl    %r9d,%ebx
        shrdl   $2,%r14d,%r14d
        addl    %esi,%r9d
        movl    %ebx,%r13d
        addl    %r9d,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r9d
        movl    %ecx,%r12d
        xorl    %ebx,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %edx,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %r9d,%r14d
        andl    %ebx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 48-128(%rdi),%xmm10
        xorl    %ebx,%r13d
        addl    12(%rsp),%r8d
        movl    %r9d,%esi
        shrdl   $11,%r14d,%r14d
        xorl    %edx,%r12d
        xorl    %r10d,%esi
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r8d
        andl    %esi,%r15d
        xorl    %r9d,%r14d
        addl    %r13d,%r8d
        xorl    %r10d,%r15d
        addl    %r8d,%eax
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%r8d
        movl    %eax,%r13d
        addl    %r8d,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r8d
        movl    %ebx,%r12d
        xorl    %eax,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %ecx,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %r8d,%r14d
        andl    %eax,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 64-128(%rdi),%xmm10
        xorl    %eax,%r13d
        addl    16(%rsp),%edx
        movl    %r8d,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %ecx,%r12d
        xorl    %r9d,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%edx
        andl    %r15d,%esi
        xorl    %r8d,%r14d
        addl    %r13d,%edx
        xorl    %r9d,%esi
        addl    %edx,%r11d
        shrdl   $2,%r14d,%r14d
        addl    %esi,%edx
        movl    %r11d,%r13d
        addl    %edx,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%edx
        movl    %eax,%r12d
        xorl    %r11d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %ebx,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %edx,%r14d
        andl    %r11d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 80-128(%rdi),%xmm10
        xorl    %r11d,%r13d
        addl    20(%rsp),%ecx
        movl    %edx,%esi
        shrdl   $11,%r14d,%r14d
        xorl    %ebx,%r12d
        xorl    %r8d,%esi
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%ecx
        andl    %esi,%r15d
        xorl    %edx,%r14d
        addl    %r13d,%ecx
        xorl    %r8d,%r15d
        addl    %ecx,%r10d
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%ecx
        movl    %r10d,%r13d
        addl    %ecx,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%ecx
        movl    %r11d,%r12d
        xorl    %r10d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %eax,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %ecx,%r14d
        andl    %r10d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 96-128(%rdi),%xmm10
        xorl    %r10d,%r13d
        addl    24(%rsp),%ebx
        movl    %ecx,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %eax,%r12d
        xorl    %edx,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%ebx
        andl    %r15d,%esi
        xorl    %ecx,%r14d
        addl    %r13d,%ebx
        xorl    %edx,%esi
        addl    %ebx,%r9d
        shrdl   $2,%r14d,%r14d
        addl    %esi,%ebx
        movl    %r9d,%r13d
        addl    %ebx,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%ebx
        movl    %r10d,%r12d
        xorl    %r9d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r11d,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %ebx,%r14d
        andl    %r9d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 112-128(%rdi),%xmm10
        xorl    %r9d,%r13d
        addl    28(%rsp),%eax
        movl    %ebx,%esi
        shrdl   $11,%r14d,%r14d
        xorl    %r11d,%r12d
        xorl    %ecx,%esi
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%eax
        andl    %esi,%r15d
        xorl    %ebx,%r14d
        addl    %r13d,%eax
        xorl    %ecx,%r15d
        addl    %eax,%r8d
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%eax
        movl    %r8d,%r13d
        addl    %eax,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%eax
        movl    %r9d,%r12d
        xorl    %r8d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r10d,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %eax,%r14d
        andl    %r8d,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 128-128(%rdi),%xmm10
        xorl    %r8d,%r13d
        addl    32(%rsp),%r11d
        movl    %eax,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %r10d,%r12d
        xorl    %ebx,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r11d
        andl    %r15d,%esi
        xorl    %eax,%r14d
        addl    %r13d,%r11d
        xorl    %ebx,%esi
        addl    %r11d,%edx
        shrdl   $2,%r14d,%r14d
        addl    %esi,%r11d
        movl    %edx,%r13d
        addl    %r11d,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r11d
        movl    %r8d,%r12d
        xorl    %edx,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r9d,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %r11d,%r14d
        andl    %edx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 144-128(%rdi),%xmm10
        xorl    %edx,%r13d
        addl    36(%rsp),%r10d
        movl    %r11d,%esi
        shrdl   $11,%r14d,%r14d
        xorl    %r9d,%r12d
        xorl    %eax,%esi
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r10d
        andl    %esi,%r15d
        xorl    %r11d,%r14d
        addl    %r13d,%r10d
        xorl    %eax,%r15d
        addl    %r10d,%ecx
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%r10d
        movl    %ecx,%r13d
        addl    %r10d,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r10d
        movl    %edx,%r12d
        xorl    %ecx,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r8d,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %r10d,%r14d
        andl    %ecx,%r12d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 160-128(%rdi),%xmm10
        xorl    %ecx,%r13d
        addl    40(%rsp),%r9d
        movl    %r10d,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %r8d,%r12d
        xorl    %r11d,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r9d
        andl    %r15d,%esi
        xorl    %r10d,%r14d
        addl    %r13d,%r9d
        xorl    %r11d,%esi
        addl    %r9d,%ebx
        shrdl   $2,%r14d,%r14d
        addl    %esi,%r9d
        movl    %ebx,%r13d
        addl    %r9d,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r9d
        movl    %ecx,%r12d
        xorl    %ebx,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %edx,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %r9d,%r14d
        andl    %ebx,%r12d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 176-128(%rdi),%xmm10
        xorl    %ebx,%r13d
        addl    44(%rsp),%r8d
        movl    %r9d,%esi
        shrdl   $11,%r14d,%r14d
        xorl    %edx,%r12d
        xorl    %r10d,%esi
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%r8d
        andl    %esi,%r15d
        xorl    %r9d,%r14d
        addl    %r13d,%r8d
        xorl    %r10d,%r15d
        addl    %r8d,%eax
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%r8d
        movl    %eax,%r13d
        addl    %r8d,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%r8d
        movl    %ebx,%r12d
        xorl    %eax,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %ecx,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %r8d,%r14d
        andl    %eax,%r12d
        vpand   %xmm12,%xmm11,%xmm8
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 192-128(%rdi),%xmm10
        xorl    %eax,%r13d
        addl    48(%rsp),%edx
        movl    %r8d,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %ecx,%r12d
        xorl    %r9d,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%edx
        andl    %r15d,%esi
        xorl    %r8d,%r14d
        addl    %r13d,%edx
        xorl    %r9d,%esi
        addl    %edx,%r11d
        shrdl   $2,%r14d,%r14d
        addl    %esi,%edx
        movl    %r11d,%r13d
        addl    %edx,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%edx
        movl    %eax,%r12d
        xorl    %r11d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %ebx,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %edx,%r14d
        andl    %r11d,%r12d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 208-128(%rdi),%xmm10
        xorl    %r11d,%r13d
        addl    52(%rsp),%ecx
        movl    %edx,%esi
        shrdl   $11,%r14d,%r14d
        xorl    %ebx,%r12d
        xorl    %r8d,%esi
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%ecx
        andl    %esi,%r15d
        xorl    %edx,%r14d
        addl    %r13d,%ecx
        xorl    %r8d,%r15d
        addl    %ecx,%r10d
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%ecx
        movl    %r10d,%r13d
        addl    %ecx,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%ecx
        movl    %r11d,%r12d
        xorl    %r10d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %eax,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %ecx,%r14d
        andl    %r10d,%r12d
        vpand   %xmm13,%xmm11,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 224-128(%rdi),%xmm10
        xorl    %r10d,%r13d
        addl    56(%rsp),%ebx
        movl    %ecx,%r15d
        shrdl   $11,%r14d,%r14d
        xorl    %eax,%r12d
        xorl    %edx,%r15d
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%ebx
        andl    %r15d,%esi
        xorl    %ecx,%r14d
        addl    %r13d,%ebx
        xorl    %edx,%esi
        addl    %ebx,%r9d
        shrdl   $2,%r14d,%r14d
        addl    %esi,%ebx
        movl    %r9d,%r13d
        addl    %ebx,%r14d
        shrdl   $14,%r13d,%r13d
        movl    %r14d,%ebx
        movl    %r10d,%r12d
        xorl    %r9d,%r13d
        shrdl   $9,%r14d,%r14d
        xorl    %r11d,%r12d
        shrdl   $5,%r13d,%r13d
        xorl    %ebx,%r14d
        andl    %r9d,%r12d
        vpor    %xmm11,%xmm8,%xmm8
        vaesenclast     %xmm10,%xmm9,%xmm11
        vmovdqu 0-128(%rdi),%xmm10
        xorl    %r9d,%r13d
        addl    60(%rsp),%eax
        movl    %ebx,%esi
        shrdl   $11,%r14d,%r14d
        xorl    %r11d,%r12d
        xorl    %ecx,%esi
        shrdl   $6,%r13d,%r13d
        addl    %r12d,%eax
        andl    %esi,%r15d
        xorl    %ebx,%r14d
        addl    %r13d,%eax
        xorl    %ecx,%r15d
        addl    %eax,%r8d
        shrdl   $2,%r14d,%r14d
        addl    %r15d,%eax
        movl    %r8d,%r13d
        addl    %eax,%r14d
        movq    64+0(%rsp),%r12
        movq    64+8(%rsp),%r13
        movq    64+40(%rsp),%r15
        movq    64+48(%rsp),%rsi

        vpand   %xmm14,%xmm11,%xmm11
        movl    %r14d,%eax
        vpor    %xmm11,%xmm8,%xmm8
        vmovdqu %xmm8,(%r12,%r13,1)
        leaq    16(%r12),%r12

        addl    0(%r15),%eax
        addl    4(%r15),%ebx
        addl    8(%r15),%ecx
        addl    12(%r15),%edx
        addl    16(%r15),%r8d
        addl    20(%r15),%r9d
        addl    24(%r15),%r10d
        addl    28(%r15),%r11d

        cmpq    64+16(%rsp),%r12

        movl    %eax,0(%r15)
        movl    %ebx,4(%r15)
        movl    %ecx,8(%r15)
        movl    %edx,12(%r15)
        movl    %r8d,16(%r15)
        movl    %r9d,20(%r15)
        movl    %r10d,24(%r15)
        movl    %r11d,28(%r15)
        jb      .Lloop_avx

        movq    64+32(%rsp),%r8
        movq    120(%rsp),%rsi
.cfi_def_cfa    %rsi,8
        vmovdqu %xmm8,(%r8)
        vzeroall
        movq    -48(%rsi),%r15
.cfi_restore    %r15
        movq    -40(%rsi),%r14
.cfi_restore    %r14
        movq    -32(%rsi),%r13
.cfi_restore    %r13
        movq    -24(%rsi),%r12
.cfi_restore    %r12
        movq    -16(%rsi),%rbp
.cfi_restore    %rbp
        movq    -8(%rsi),%rbx
.cfi_restore    %rbx
        leaq    (%rsi),%rsp
.cfi_def_cfa_register   %rsp
.Lepilogue_avx:
        .byte   0xf3,0xc3
.cfi_endproc
.size   aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx
.type   aesni_cbc_sha256_enc_avx2,@function
.align  64
aesni_cbc_sha256_enc_avx2:
.cfi_startproc
.Lavx2_shortcut:
        movq    8(%rsp),%r10
        movq    %rsp,%rax
.cfi_def_cfa_register   %rax
        pushq   %rbx
.cfi_offset     %rbx,-16
        pushq   %rbp
.cfi_offset     %rbp,-24
        pushq   %r12
.cfi_offset     %r12,-32
        pushq   %r13
.cfi_offset     %r13,-40
        pushq   %r14
.cfi_offset     %r14,-48
        pushq   %r15
.cfi_offset     %r15,-56
        subq    $576,%rsp
        andq    $-1024,%rsp
        addq    $448,%rsp

        shlq    $6,%rdx
        subq    %rdi,%rsi
        subq    %rdi,%r10
        addq    %rdi,%rdx



        movq    %rdx,64+16(%rsp)

        movq    %r8,64+32(%rsp)
        movq    %r9,64+40(%rsp)
        movq    %r10,64+48(%rsp)
        movq    %rax,120(%rsp)
.cfi_escape     0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_avx2:
        vzeroall

        movq    %rdi,%r13
        vpinsrq $1,%rsi,%xmm15,%xmm15
        leaq    128(%rcx),%rdi
        leaq    K256+544(%rip),%r12
        movl    240-128(%rdi),%r14d
        movq    %r9,%r15
        movq    %r10,%rsi
        vmovdqu (%r8),%xmm8
        leaq    -9(%r14),%r14

        vmovdqa 0(%r12,%r14,8),%xmm14
        vmovdqa 16(%r12,%r14,8),%xmm13
        vmovdqa 32(%r12,%r14,8),%xmm12

        subq    $-64,%r13
        movl    0(%r15),%eax
        leaq    (%rsi,%r13,1),%r12
        movl    4(%r15),%ebx
        cmpq    %rdx,%r13
        movl    8(%r15),%ecx
        cmoveq  %rsp,%r12
        movl    12(%r15),%edx
        movl    16(%r15),%r8d
        movl    20(%r15),%r9d
        movl    24(%r15),%r10d
        movl    28(%r15),%r11d
        vmovdqu 0-128(%rdi),%xmm10
        jmp     .Loop_avx2
.align  16
.Loop_avx2:
        vmovdqa K256+512(%rip),%ymm7
        vmovdqu -64+0(%rsi,%r13,1),%xmm0
        vmovdqu -64+16(%rsi,%r13,1),%xmm1
        vmovdqu -64+32(%rsi,%r13,1),%xmm2
        vmovdqu -64+48(%rsi,%r13,1),%xmm3

        vinserti128     $1,(%r12),%ymm0,%ymm0
        vinserti128     $1,16(%r12),%ymm1,%ymm1
        vpshufb %ymm7,%ymm0,%ymm0
        vinserti128     $1,32(%r12),%ymm2,%ymm2
        vpshufb %ymm7,%ymm1,%ymm1
        vinserti128     $1,48(%r12),%ymm3,%ymm3

        leaq    K256(%rip),%rbp
        vpshufb %ymm7,%ymm2,%ymm2
        leaq    -64(%r13),%r13
        vpaddd  0(%rbp),%ymm0,%ymm4
        vpshufb %ymm7,%ymm3,%ymm3
        vpaddd  32(%rbp),%ymm1,%ymm5
        vpaddd  64(%rbp),%ymm2,%ymm6
        vpaddd  96(%rbp),%ymm3,%ymm7
        vmovdqa %ymm4,0(%rsp)
        xorl    %r14d,%r14d
        vmovdqa %ymm5,32(%rsp)

        movq    120(%rsp),%rsi
.cfi_def_cfa    %rsi,8
        leaq    -64(%rsp),%rsp



        movq    %rsi,-8(%rsp)
.cfi_escape     0x0f,0x05,0x77,0x78,0x06,0x23,0x08
        movl    %ebx,%esi
        vmovdqa %ymm6,0(%rsp)
        xorl    %ecx,%esi
        vmovdqa %ymm7,32(%rsp)
        movl    %r9d,%r12d
        subq    $-32*4,%rbp
        jmp     .Lavx2_00_47

.align  16
.Lavx2_00_47:
        vmovdqu (%r13),%xmm9
        vpinsrq $0,%r13,%xmm15,%xmm15
        leaq    -64(%rsp),%rsp
.cfi_escape     0x0f,0x05,0x77,0x38,0x06,0x23,0x08

        pushq   64-8(%rsp)
.cfi_escape     0x0f,0x05,0x77,0x00,0x06,0x23,0x08
        leaq    8(%rsp),%rsp
.cfi_escape     0x0f,0x05,0x77,0x78,0x06,0x23,0x08
        vpalignr        $4,%ymm0,%ymm1,%ymm4
        addl    0+128(%rsp),%r11d
        andl    %r8d,%r12d
        rorxl   $25,%r8d,%r13d
        vpalignr        $4,%ymm2,%ymm3,%ymm7
        rorxl   $11,%r8d,%r15d
        leal    (%rax,%r14,1),%eax
        leal    (%r11,%r12,1),%r11d
        vpsrld  $7,%ymm4,%ymm6
        andnl   %r10d,%r8d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r8d,%r14d
        vpaddd  %ymm7,%ymm0,%ymm0
        leal    (%r11,%r12,1),%r11d
        xorl    %r14d,%r13d
        movl    %eax,%r15d
        vpsrld  $3,%ymm4,%ymm7
        rorxl   $22,%eax,%r12d
        leal    (%r11,%r13,1),%r11d
        xorl    %ebx,%r15d
        vpslld  $14,%ymm4,%ymm5
        rorxl   $13,%eax,%r14d
        rorxl   $2,%eax,%r13d
        leal    (%rdx,%r11,1),%edx
        vpxor   %ymm6,%ymm7,%ymm4
        andl    %r15d,%esi
        vpxor   %xmm10,%xmm9,%xmm9
        vmovdqu 16-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ebx,%esi
        vpshufd $250,%ymm3,%ymm7
        xorl    %r13d,%r14d
        leal    (%r11,%rsi,1),%r11d
        movl    %r8d,%r12d
        vpsrld  $11,%ymm6,%ymm6
        addl    4+128(%rsp),%r10d
        andl    %edx,%r12d
        rorxl   $25,%edx,%r13d
        vpxor   %ymm5,%ymm4,%ymm4
        rorxl   $11,%edx,%esi
        leal    (%r11,%r14,1),%r11d
        leal    (%r10,%r12,1),%r10d
        vpslld  $11,%ymm5,%ymm5
        andnl   %r9d,%edx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%edx,%r14d
        vpxor   %ymm6,%ymm4,%ymm4
        leal    (%r10,%r12,1),%r10d
        xorl    %r14d,%r13d
        movl    %r11d,%esi
        vpsrld  $10,%ymm7,%ymm6
        rorxl   $22,%r11d,%r12d
        leal    (%r10,%r13,1),%r10d
        xorl    %eax,%esi
        vpxor   %ymm5,%ymm4,%ymm4
        rorxl   $13,%r11d,%r14d
        rorxl   $2,%r11d,%r13d
        leal    (%rcx,%r10,1),%ecx
        vpsrlq  $17,%ymm7,%ymm7
        andl    %esi,%r15d
        vpxor   %xmm8,%xmm9,%xmm9
        xorl    %r12d,%r14d
        xorl    %eax,%r15d
        vpaddd  %ymm4,%ymm0,%ymm0
        xorl    %r13d,%r14d
        leal    (%r10,%r15,1),%r10d
        movl    %edx,%r12d
        vpxor   %ymm7,%ymm6,%ymm6
        addl    8+128(%rsp),%r9d
        andl    %ecx,%r12d
        rorxl   $25,%ecx,%r13d
        vpsrlq  $2,%ymm7,%ymm7
        rorxl   $11,%ecx,%r15d
        leal    (%r10,%r14,1),%r10d
        leal    (%r9,%r12,1),%r9d
        vpxor   %ymm7,%ymm6,%ymm6
        andnl   %r8d,%ecx,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%ecx,%r14d
        vpshufd $132,%ymm6,%ymm6
        leal    (%r9,%r12,1),%r9d
        xorl    %r14d,%r13d
        movl    %r10d,%r15d
        vpsrldq $8,%ymm6,%ymm6
        rorxl   $22,%r10d,%r12d
        leal    (%r9,%r13,1),%r9d
        xorl    %r11d,%r15d
        vpaddd  %ymm6,%ymm0,%ymm0
        rorxl   $13,%r10d,%r14d
        rorxl   $2,%r10d,%r13d
        leal    (%rbx,%r9,1),%ebx
        vpshufd $80,%ymm0,%ymm7
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 32-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r11d,%esi
        vpsrld  $10,%ymm7,%ymm6
        xorl    %r13d,%r14d
        leal    (%r9,%rsi,1),%r9d
        movl    %ecx,%r12d
        vpsrlq  $17,%ymm7,%ymm7
        addl    12+128(%rsp),%r8d
        andl    %ebx,%r12d
        rorxl   $25,%ebx,%r13d
        vpxor   %ymm7,%ymm6,%ymm6
        rorxl   $11,%ebx,%esi
        leal    (%r9,%r14,1),%r9d
        leal    (%r8,%r12,1),%r8d
        vpsrlq  $2,%ymm7,%ymm7
        andnl   %edx,%ebx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%ebx,%r14d
        vpxor   %ymm7,%ymm6,%ymm6
        leal    (%r8,%r12,1),%r8d
        xorl    %r14d,%r13d
        movl    %r9d,%esi
        vpshufd $232,%ymm6,%ymm6
        rorxl   $22,%r9d,%r12d
        leal    (%r8,%r13,1),%r8d
        xorl    %r10d,%esi
        vpslldq $8,%ymm6,%ymm6
        rorxl   $13,%r9d,%r14d
        rorxl   $2,%r9d,%r13d
        leal    (%rax,%r8,1),%eax
        vpaddd  %ymm6,%ymm0,%ymm0
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 48-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r10d,%r15d
        vpaddd  0(%rbp),%ymm0,%ymm6
        xorl    %r13d,%r14d
        leal    (%r8,%r15,1),%r8d
        movl    %ebx,%r12d
        vmovdqa %ymm6,0(%rsp)
        vpalignr        $4,%ymm1,%ymm2,%ymm4
        addl    32+128(%rsp),%edx
        andl    %eax,%r12d
        rorxl   $25,%eax,%r13d
        vpalignr        $4,%ymm3,%ymm0,%ymm7
        rorxl   $11,%eax,%r15d
        leal    (%r8,%r14,1),%r8d
        leal    (%rdx,%r12,1),%edx
        vpsrld  $7,%ymm4,%ymm6
        andnl   %ecx,%eax,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%eax,%r14d
        vpaddd  %ymm7,%ymm1,%ymm1
        leal    (%rdx,%r12,1),%edx
        xorl    %r14d,%r13d
        movl    %r8d,%r15d
        vpsrld  $3,%ymm4,%ymm7
        rorxl   $22,%r8d,%r12d
        leal    (%rdx,%r13,1),%edx
        xorl    %r9d,%r15d
        vpslld  $14,%ymm4,%ymm5
        rorxl   $13,%r8d,%r14d
        rorxl   $2,%r8d,%r13d
        leal    (%r11,%rdx,1),%r11d
        vpxor   %ymm6,%ymm7,%ymm4
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 64-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r9d,%esi
        vpshufd $250,%ymm0,%ymm7
        xorl    %r13d,%r14d
        leal    (%rdx,%rsi,1),%edx
        movl    %eax,%r12d
        vpsrld  $11,%ymm6,%ymm6
        addl    36+128(%rsp),%ecx
        andl    %r11d,%r12d
        rorxl   $25,%r11d,%r13d
        vpxor   %ymm5,%ymm4,%ymm4
        rorxl   $11,%r11d,%esi
        leal    (%rdx,%r14,1),%edx
        leal    (%rcx,%r12,1),%ecx
        vpslld  $11,%ymm5,%ymm5
        andnl   %ebx,%r11d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r11d,%r14d
        vpxor   %ymm6,%ymm4,%ymm4
        leal    (%rcx,%r12,1),%ecx
        xorl    %r14d,%r13d
        movl    %edx,%esi
        vpsrld  $10,%ymm7,%ymm6
        rorxl   $22,%edx,%r12d
        leal    (%rcx,%r13,1),%ecx
        xorl    %r8d,%esi
        vpxor   %ymm5,%ymm4,%ymm4
        rorxl   $13,%edx,%r14d
        rorxl   $2,%edx,%r13d
        leal    (%r10,%rcx,1),%r10d
        vpsrlq  $17,%ymm7,%ymm7
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 80-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r8d,%r15d
        vpaddd  %ymm4,%ymm1,%ymm1
        xorl    %r13d,%r14d
        leal    (%rcx,%r15,1),%ecx
        movl    %r11d,%r12d
        vpxor   %ymm7,%ymm6,%ymm6
        addl    40+128(%rsp),%ebx
        andl    %r10d,%r12d
        rorxl   $25,%r10d,%r13d
        vpsrlq  $2,%ymm7,%ymm7
        rorxl   $11,%r10d,%r15d
        leal    (%rcx,%r14,1),%ecx
        leal    (%rbx,%r12,1),%ebx
        vpxor   %ymm7,%ymm6,%ymm6
        andnl   %eax,%r10d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r10d,%r14d
        vpshufd $132,%ymm6,%ymm6
        leal    (%rbx,%r12,1),%ebx
        xorl    %r14d,%r13d
        movl    %ecx,%r15d
        vpsrldq $8,%ymm6,%ymm6
        rorxl   $22,%ecx,%r12d
        leal    (%rbx,%r13,1),%ebx
        xorl    %edx,%r15d
        vpaddd  %ymm6,%ymm1,%ymm1
        rorxl   $13,%ecx,%r14d
        rorxl   $2,%ecx,%r13d
        leal    (%r9,%rbx,1),%r9d
        vpshufd $80,%ymm1,%ymm7
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 96-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %edx,%esi
        vpsrld  $10,%ymm7,%ymm6
        xorl    %r13d,%r14d
        leal    (%rbx,%rsi,1),%ebx
        movl    %r10d,%r12d
        vpsrlq  $17,%ymm7,%ymm7
        addl    44+128(%rsp),%eax
        andl    %r9d,%r12d
        rorxl   $25,%r9d,%r13d
        vpxor   %ymm7,%ymm6,%ymm6
        rorxl   $11,%r9d,%esi
        leal    (%rbx,%r14,1),%ebx
        leal    (%rax,%r12,1),%eax
        vpsrlq  $2,%ymm7,%ymm7
        andnl   %r11d,%r9d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r9d,%r14d
        vpxor   %ymm7,%ymm6,%ymm6
        leal    (%rax,%r12,1),%eax
        xorl    %r14d,%r13d
        movl    %ebx,%esi
        vpshufd $232,%ymm6,%ymm6
        rorxl   $22,%ebx,%r12d
        leal    (%rax,%r13,1),%eax
        xorl    %ecx,%esi
        vpslldq $8,%ymm6,%ymm6
        rorxl   $13,%ebx,%r14d
        rorxl   $2,%ebx,%r13d
        leal    (%r8,%rax,1),%r8d
        vpaddd  %ymm6,%ymm1,%ymm1
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 112-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ecx,%r15d
        vpaddd  32(%rbp),%ymm1,%ymm6
        xorl    %r13d,%r14d
        leal    (%rax,%r15,1),%eax
        movl    %r9d,%r12d
        vmovdqa %ymm6,32(%rsp)
        leaq    -64(%rsp),%rsp
.cfi_escape     0x0f,0x05,0x77,0x38,0x06,0x23,0x08

        pushq   64-8(%rsp)
.cfi_escape     0x0f,0x05,0x77,0x00,0x06,0x23,0x08
        leaq    8(%rsp),%rsp
.cfi_escape     0x0f,0x05,0x77,0x78,0x06,0x23,0x08
        vpalignr        $4,%ymm2,%ymm3,%ymm4
        addl    0+128(%rsp),%r11d
        andl    %r8d,%r12d
        rorxl   $25,%r8d,%r13d
        vpalignr        $4,%ymm0,%ymm1,%ymm7
        rorxl   $11,%r8d,%r15d
        leal    (%rax,%r14,1),%eax
        leal    (%r11,%r12,1),%r11d
        vpsrld  $7,%ymm4,%ymm6
        andnl   %r10d,%r8d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r8d,%r14d
        vpaddd  %ymm7,%ymm2,%ymm2
        leal    (%r11,%r12,1),%r11d
        xorl    %r14d,%r13d
        movl    %eax,%r15d
        vpsrld  $3,%ymm4,%ymm7
        rorxl   $22,%eax,%r12d
        leal    (%r11,%r13,1),%r11d
        xorl    %ebx,%r15d
        vpslld  $14,%ymm4,%ymm5
        rorxl   $13,%eax,%r14d
        rorxl   $2,%eax,%r13d
        leal    (%rdx,%r11,1),%edx
        vpxor   %ymm6,%ymm7,%ymm4
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 128-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ebx,%esi
        vpshufd $250,%ymm1,%ymm7
        xorl    %r13d,%r14d
        leal    (%r11,%rsi,1),%r11d
        movl    %r8d,%r12d
        vpsrld  $11,%ymm6,%ymm6
        addl    4+128(%rsp),%r10d
        andl    %edx,%r12d
        rorxl   $25,%edx,%r13d
        vpxor   %ymm5,%ymm4,%ymm4
        rorxl   $11,%edx,%esi
        leal    (%r11,%r14,1),%r11d
        leal    (%r10,%r12,1),%r10d
        vpslld  $11,%ymm5,%ymm5
        andnl   %r9d,%edx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%edx,%r14d
        vpxor   %ymm6,%ymm4,%ymm4
        leal    (%r10,%r12,1),%r10d
        xorl    %r14d,%r13d
        movl    %r11d,%esi
        vpsrld  $10,%ymm7,%ymm6
        rorxl   $22,%r11d,%r12d
        leal    (%r10,%r13,1),%r10d
        xorl    %eax,%esi
        vpxor   %ymm5,%ymm4,%ymm4
        rorxl   $13,%r11d,%r14d
        rorxl   $2,%r11d,%r13d
        leal    (%rcx,%r10,1),%ecx
        vpsrlq  $17,%ymm7,%ymm7
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 144-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %eax,%r15d
        vpaddd  %ymm4,%ymm2,%ymm2
        xorl    %r13d,%r14d
        leal    (%r10,%r15,1),%r10d
        movl    %edx,%r12d
        vpxor   %ymm7,%ymm6,%ymm6
        addl    8+128(%rsp),%r9d
        andl    %ecx,%r12d
        rorxl   $25,%ecx,%r13d
        vpsrlq  $2,%ymm7,%ymm7
        rorxl   $11,%ecx,%r15d
        leal    (%r10,%r14,1),%r10d
        leal    (%r9,%r12,1),%r9d
        vpxor   %ymm7,%ymm6,%ymm6
        andnl   %r8d,%ecx,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%ecx,%r14d
        vpshufd $132,%ymm6,%ymm6
        leal    (%r9,%r12,1),%r9d
        xorl    %r14d,%r13d
        movl    %r10d,%r15d
        vpsrldq $8,%ymm6,%ymm6
        rorxl   $22,%r10d,%r12d
        leal    (%r9,%r13,1),%r9d
        xorl    %r11d,%r15d
        vpaddd  %ymm6,%ymm2,%ymm2
        rorxl   $13,%r10d,%r14d
        rorxl   $2,%r10d,%r13d
        leal    (%rbx,%r9,1),%ebx
        vpshufd $80,%ymm2,%ymm7
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 160-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r11d,%esi
        vpsrld  $10,%ymm7,%ymm6
        xorl    %r13d,%r14d
        leal    (%r9,%rsi,1),%r9d
        movl    %ecx,%r12d
        vpsrlq  $17,%ymm7,%ymm7
        addl    12+128(%rsp),%r8d
        andl    %ebx,%r12d
        rorxl   $25,%ebx,%r13d
        vpxor   %ymm7,%ymm6,%ymm6
        rorxl   $11,%ebx,%esi
        leal    (%r9,%r14,1),%r9d
        leal    (%r8,%r12,1),%r8d
        vpsrlq  $2,%ymm7,%ymm7
        andnl   %edx,%ebx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%ebx,%r14d
        vpxor   %ymm7,%ymm6,%ymm6
        leal    (%r8,%r12,1),%r8d
        xorl    %r14d,%r13d
        movl    %r9d,%esi
        vpshufd $232,%ymm6,%ymm6
        rorxl   $22,%r9d,%r12d
        leal    (%r8,%r13,1),%r8d
        xorl    %r10d,%esi
        vpslldq $8,%ymm6,%ymm6
        rorxl   $13,%r9d,%r14d
        rorxl   $2,%r9d,%r13d
        leal    (%rax,%r8,1),%eax
        vpaddd  %ymm6,%ymm2,%ymm2
        andl    %esi,%r15d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 176-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r10d,%r15d
        vpaddd  64(%rbp),%ymm2,%ymm6
        xorl    %r13d,%r14d
        leal    (%r8,%r15,1),%r8d
        movl    %ebx,%r12d
        vmovdqa %ymm6,0(%rsp)
        vpalignr        $4,%ymm3,%ymm0,%ymm4
        addl    32+128(%rsp),%edx
        andl    %eax,%r12d
        rorxl   $25,%eax,%r13d
        vpalignr        $4,%ymm1,%ymm2,%ymm7
        rorxl   $11,%eax,%r15d
        leal    (%r8,%r14,1),%r8d
        leal    (%rdx,%r12,1),%edx
        vpsrld  $7,%ymm4,%ymm6
        andnl   %ecx,%eax,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%eax,%r14d
        vpaddd  %ymm7,%ymm3,%ymm3
        leal    (%rdx,%r12,1),%edx
        xorl    %r14d,%r13d
        movl    %r8d,%r15d
        vpsrld  $3,%ymm4,%ymm7
        rorxl   $22,%r8d,%r12d
        leal    (%rdx,%r13,1),%edx
        xorl    %r9d,%r15d
        vpslld  $14,%ymm4,%ymm5
        rorxl   $13,%r8d,%r14d
        rorxl   $2,%r8d,%r13d
        leal    (%r11,%rdx,1),%r11d
        vpxor   %ymm6,%ymm7,%ymm4
        andl    %r15d,%esi
        vpand   %xmm12,%xmm11,%xmm8
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 192-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r9d,%esi
        vpshufd $250,%ymm2,%ymm7
        xorl    %r13d,%r14d
        leal    (%rdx,%rsi,1),%edx
        movl    %eax,%r12d
        vpsrld  $11,%ymm6,%ymm6
        addl    36+128(%rsp),%ecx
        andl    %r11d,%r12d
        rorxl   $25,%r11d,%r13d
        vpxor   %ymm5,%ymm4,%ymm4
        rorxl   $11,%r11d,%esi
        leal    (%rdx,%r14,1),%edx
        leal    (%rcx,%r12,1),%ecx
        vpslld  $11,%ymm5,%ymm5
        andnl   %ebx,%r11d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r11d,%r14d
        vpxor   %ymm6,%ymm4,%ymm4
        leal    (%rcx,%r12,1),%ecx
        xorl    %r14d,%r13d
        movl    %edx,%esi
        vpsrld  $10,%ymm7,%ymm6
        rorxl   $22,%edx,%r12d
        leal    (%rcx,%r13,1),%ecx
        xorl    %r8d,%esi
        vpxor   %ymm5,%ymm4,%ymm4
        rorxl   $13,%edx,%r14d
        rorxl   $2,%edx,%r13d
        leal    (%r10,%rcx,1),%r10d
        vpsrlq  $17,%ymm7,%ymm7
        andl    %esi,%r15d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 208-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r8d,%r15d
        vpaddd  %ymm4,%ymm3,%ymm3
        xorl    %r13d,%r14d
        leal    (%rcx,%r15,1),%ecx
        movl    %r11d,%r12d
        vpxor   %ymm7,%ymm6,%ymm6
        addl    40+128(%rsp),%ebx
        andl    %r10d,%r12d
        rorxl   $25,%r10d,%r13d
        vpsrlq  $2,%ymm7,%ymm7
        rorxl   $11,%r10d,%r15d
        leal    (%rcx,%r14,1),%ecx
        leal    (%rbx,%r12,1),%ebx
        vpxor   %ymm7,%ymm6,%ymm6
        andnl   %eax,%r10d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r10d,%r14d
        vpshufd $132,%ymm6,%ymm6
        leal    (%rbx,%r12,1),%ebx
        xorl    %r14d,%r13d
        movl    %ecx,%r15d
        vpsrldq $8,%ymm6,%ymm6
        rorxl   $22,%ecx,%r12d
        leal    (%rbx,%r13,1),%ebx
        xorl    %edx,%r15d
        vpaddd  %ymm6,%ymm3,%ymm3
        rorxl   $13,%ecx,%r14d
        rorxl   $2,%ecx,%r13d
        leal    (%r9,%rbx,1),%r9d
        vpshufd $80,%ymm3,%ymm7
        andl    %r15d,%esi
        vpand   %xmm13,%xmm11,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 224-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %edx,%esi
        vpsrld  $10,%ymm7,%ymm6
        xorl    %r13d,%r14d
        leal    (%rbx,%rsi,1),%ebx
        movl    %r10d,%r12d
        vpsrlq  $17,%ymm7,%ymm7
        addl    44+128(%rsp),%eax
        andl    %r9d,%r12d
        rorxl   $25,%r9d,%r13d
        vpxor   %ymm7,%ymm6,%ymm6
        rorxl   $11,%r9d,%esi
        leal    (%rbx,%r14,1),%ebx
        leal    (%rax,%r12,1),%eax
        vpsrlq  $2,%ymm7,%ymm7
        andnl   %r11d,%r9d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r9d,%r14d
        vpxor   %ymm7,%ymm6,%ymm6
        leal    (%rax,%r12,1),%eax
        xorl    %r14d,%r13d
        movl    %ebx,%esi
        vpshufd $232,%ymm6,%ymm6
        rorxl   $22,%ebx,%r12d
        leal    (%rax,%r13,1),%eax
        xorl    %ecx,%esi
        vpslldq $8,%ymm6,%ymm6
        rorxl   $13,%ebx,%r14d
        rorxl   $2,%ebx,%r13d
        leal    (%r8,%rax,1),%r8d
        vpaddd  %ymm6,%ymm3,%ymm3
        andl    %esi,%r15d
        vpor    %xmm11,%xmm8,%xmm8
        vaesenclast     %xmm10,%xmm9,%xmm11
        vmovdqu 0-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ecx,%r15d
        vpaddd  96(%rbp),%ymm3,%ymm6
        xorl    %r13d,%r14d
        leal    (%rax,%r15,1),%eax
        movl    %r9d,%r12d
        vmovdqa %ymm6,32(%rsp)
        vmovq   %xmm15,%r13
        vpextrq $1,%xmm15,%r15
        vpand   %xmm14,%xmm11,%xmm11
        vpor    %xmm11,%xmm8,%xmm8
        vmovdqu %xmm8,(%r15,%r13,1)
        leaq    16(%r13),%r13
        leaq    128(%rbp),%rbp
        cmpb    $0,3(%rbp)
        jne     .Lavx2_00_47
        vmovdqu (%r13),%xmm9
        vpinsrq $0,%r13,%xmm15,%xmm15
        addl    0+64(%rsp),%r11d
        andl    %r8d,%r12d
        rorxl   $25,%r8d,%r13d
        rorxl   $11,%r8d,%r15d
        leal    (%rax,%r14,1),%eax
        leal    (%r11,%r12,1),%r11d
        andnl   %r10d,%r8d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r8d,%r14d
        leal    (%r11,%r12,1),%r11d
        xorl    %r14d,%r13d
        movl    %eax,%r15d
        rorxl   $22,%eax,%r12d
        leal    (%r11,%r13,1),%r11d
        xorl    %ebx,%r15d
        rorxl   $13,%eax,%r14d
        rorxl   $2,%eax,%r13d
        leal    (%rdx,%r11,1),%edx
        andl    %r15d,%esi
        vpxor   %xmm10,%xmm9,%xmm9
        vmovdqu 16-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ebx,%esi
        xorl    %r13d,%r14d
        leal    (%r11,%rsi,1),%r11d
        movl    %r8d,%r12d
        addl    4+64(%rsp),%r10d
        andl    %edx,%r12d
        rorxl   $25,%edx,%r13d
        rorxl   $11,%edx,%esi
        leal    (%r11,%r14,1),%r11d
        leal    (%r10,%r12,1),%r10d
        andnl   %r9d,%edx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%edx,%r14d
        leal    (%r10,%r12,1),%r10d
        xorl    %r14d,%r13d
        movl    %r11d,%esi
        rorxl   $22,%r11d,%r12d
        leal    (%r10,%r13,1),%r10d
        xorl    %eax,%esi
        rorxl   $13,%r11d,%r14d
        rorxl   $2,%r11d,%r13d
        leal    (%rcx,%r10,1),%ecx
        andl    %esi,%r15d
        vpxor   %xmm8,%xmm9,%xmm9
        xorl    %r12d,%r14d
        xorl    %eax,%r15d
        xorl    %r13d,%r14d
        leal    (%r10,%r15,1),%r10d
        movl    %edx,%r12d
        addl    8+64(%rsp),%r9d
        andl    %ecx,%r12d
        rorxl   $25,%ecx,%r13d
        rorxl   $11,%ecx,%r15d
        leal    (%r10,%r14,1),%r10d
        leal    (%r9,%r12,1),%r9d
        andnl   %r8d,%ecx,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%ecx,%r14d
        leal    (%r9,%r12,1),%r9d
        xorl    %r14d,%r13d
        movl    %r10d,%r15d
        rorxl   $22,%r10d,%r12d
        leal    (%r9,%r13,1),%r9d
        xorl    %r11d,%r15d
        rorxl   $13,%r10d,%r14d
        rorxl   $2,%r10d,%r13d
        leal    (%rbx,%r9,1),%ebx
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 32-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r11d,%esi
        xorl    %r13d,%r14d
        leal    (%r9,%rsi,1),%r9d
        movl    %ecx,%r12d
        addl    12+64(%rsp),%r8d
        andl    %ebx,%r12d
        rorxl   $25,%ebx,%r13d
        rorxl   $11,%ebx,%esi
        leal    (%r9,%r14,1),%r9d
        leal    (%r8,%r12,1),%r8d
        andnl   %edx,%ebx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%ebx,%r14d
        leal    (%r8,%r12,1),%r8d
        xorl    %r14d,%r13d
        movl    %r9d,%esi
        rorxl   $22,%r9d,%r12d
        leal    (%r8,%r13,1),%r8d
        xorl    %r10d,%esi
        rorxl   $13,%r9d,%r14d
        rorxl   $2,%r9d,%r13d
        leal    (%rax,%r8,1),%eax
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 48-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r10d,%r15d
        xorl    %r13d,%r14d
        leal    (%r8,%r15,1),%r8d
        movl    %ebx,%r12d
        addl    32+64(%rsp),%edx
        andl    %eax,%r12d
        rorxl   $25,%eax,%r13d
        rorxl   $11,%eax,%r15d
        leal    (%r8,%r14,1),%r8d
        leal    (%rdx,%r12,1),%edx
        andnl   %ecx,%eax,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%eax,%r14d
        leal    (%rdx,%r12,1),%edx
        xorl    %r14d,%r13d
        movl    %r8d,%r15d
        rorxl   $22,%r8d,%r12d
        leal    (%rdx,%r13,1),%edx
        xorl    %r9d,%r15d
        rorxl   $13,%r8d,%r14d
        rorxl   $2,%r8d,%r13d
        leal    (%r11,%rdx,1),%r11d
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 64-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r9d,%esi
        xorl    %r13d,%r14d
        leal    (%rdx,%rsi,1),%edx
        movl    %eax,%r12d
        addl    36+64(%rsp),%ecx
        andl    %r11d,%r12d
        rorxl   $25,%r11d,%r13d
        rorxl   $11,%r11d,%esi
        leal    (%rdx,%r14,1),%edx
        leal    (%rcx,%r12,1),%ecx
        andnl   %ebx,%r11d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r11d,%r14d
        leal    (%rcx,%r12,1),%ecx
        xorl    %r14d,%r13d
        movl    %edx,%esi
        rorxl   $22,%edx,%r12d
        leal    (%rcx,%r13,1),%ecx
        xorl    %r8d,%esi
        rorxl   $13,%edx,%r14d
        rorxl   $2,%edx,%r13d
        leal    (%r10,%rcx,1),%r10d
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 80-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r8d,%r15d
        xorl    %r13d,%r14d
        leal    (%rcx,%r15,1),%ecx
        movl    %r11d,%r12d
        addl    40+64(%rsp),%ebx
        andl    %r10d,%r12d
        rorxl   $25,%r10d,%r13d
        rorxl   $11,%r10d,%r15d
        leal    (%rcx,%r14,1),%ecx
        leal    (%rbx,%r12,1),%ebx
        andnl   %eax,%r10d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r10d,%r14d
        leal    (%rbx,%r12,1),%ebx
        xorl    %r14d,%r13d
        movl    %ecx,%r15d
        rorxl   $22,%ecx,%r12d
        leal    (%rbx,%r13,1),%ebx
        xorl    %edx,%r15d
        rorxl   $13,%ecx,%r14d
        rorxl   $2,%ecx,%r13d
        leal    (%r9,%rbx,1),%r9d
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 96-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %edx,%esi
        xorl    %r13d,%r14d
        leal    (%rbx,%rsi,1),%ebx
        movl    %r10d,%r12d
        addl    44+64(%rsp),%eax
        andl    %r9d,%r12d
        rorxl   $25,%r9d,%r13d
        rorxl   $11,%r9d,%esi
        leal    (%rbx,%r14,1),%ebx
        leal    (%rax,%r12,1),%eax
        andnl   %r11d,%r9d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r9d,%r14d
        leal    (%rax,%r12,1),%eax
        xorl    %r14d,%r13d
        movl    %ebx,%esi
        rorxl   $22,%ebx,%r12d
        leal    (%rax,%r13,1),%eax
        xorl    %ecx,%esi
        rorxl   $13,%ebx,%r14d
        rorxl   $2,%ebx,%r13d
        leal    (%r8,%rax,1),%r8d
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 112-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ecx,%r15d
        xorl    %r13d,%r14d
        leal    (%rax,%r15,1),%eax
        movl    %r9d,%r12d
        addl    0(%rsp),%r11d
        andl    %r8d,%r12d
        rorxl   $25,%r8d,%r13d
        rorxl   $11,%r8d,%r15d
        leal    (%rax,%r14,1),%eax
        leal    (%r11,%r12,1),%r11d
        andnl   %r10d,%r8d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r8d,%r14d
        leal    (%r11,%r12,1),%r11d
        xorl    %r14d,%r13d
        movl    %eax,%r15d
        rorxl   $22,%eax,%r12d
        leal    (%r11,%r13,1),%r11d
        xorl    %ebx,%r15d
        rorxl   $13,%eax,%r14d
        rorxl   $2,%eax,%r13d
        leal    (%rdx,%r11,1),%edx
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 128-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ebx,%esi
        xorl    %r13d,%r14d
        leal    (%r11,%rsi,1),%r11d
        movl    %r8d,%r12d
        addl    4(%rsp),%r10d
        andl    %edx,%r12d
        rorxl   $25,%edx,%r13d
        rorxl   $11,%edx,%esi
        leal    (%r11,%r14,1),%r11d
        leal    (%r10,%r12,1),%r10d
        andnl   %r9d,%edx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%edx,%r14d
        leal    (%r10,%r12,1),%r10d
        xorl    %r14d,%r13d
        movl    %r11d,%esi
        rorxl   $22,%r11d,%r12d
        leal    (%r10,%r13,1),%r10d
        xorl    %eax,%esi
        rorxl   $13,%r11d,%r14d
        rorxl   $2,%r11d,%r13d
        leal    (%rcx,%r10,1),%ecx
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 144-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %eax,%r15d
        xorl    %r13d,%r14d
        leal    (%r10,%r15,1),%r10d
        movl    %edx,%r12d
        addl    8(%rsp),%r9d
        andl    %ecx,%r12d
        rorxl   $25,%ecx,%r13d
        rorxl   $11,%ecx,%r15d
        leal    (%r10,%r14,1),%r10d
        leal    (%r9,%r12,1),%r9d
        andnl   %r8d,%ecx,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%ecx,%r14d
        leal    (%r9,%r12,1),%r9d
        xorl    %r14d,%r13d
        movl    %r10d,%r15d
        rorxl   $22,%r10d,%r12d
        leal    (%r9,%r13,1),%r9d
        xorl    %r11d,%r15d
        rorxl   $13,%r10d,%r14d
        rorxl   $2,%r10d,%r13d
        leal    (%rbx,%r9,1),%ebx
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 160-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r11d,%esi
        xorl    %r13d,%r14d
        leal    (%r9,%rsi,1),%r9d
        movl    %ecx,%r12d
        addl    12(%rsp),%r8d
        andl    %ebx,%r12d
        rorxl   $25,%ebx,%r13d
        rorxl   $11,%ebx,%esi
        leal    (%r9,%r14,1),%r9d
        leal    (%r8,%r12,1),%r8d
        andnl   %edx,%ebx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%ebx,%r14d
        leal    (%r8,%r12,1),%r8d
        xorl    %r14d,%r13d
        movl    %r9d,%esi
        rorxl   $22,%r9d,%r12d
        leal    (%r8,%r13,1),%r8d
        xorl    %r10d,%esi
        rorxl   $13,%r9d,%r14d
        rorxl   $2,%r9d,%r13d
        leal    (%rax,%r8,1),%eax
        andl    %esi,%r15d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 176-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r10d,%r15d
        xorl    %r13d,%r14d
        leal    (%r8,%r15,1),%r8d
        movl    %ebx,%r12d
        addl    32(%rsp),%edx
        andl    %eax,%r12d
        rorxl   $25,%eax,%r13d
        rorxl   $11,%eax,%r15d
        leal    (%r8,%r14,1),%r8d
        leal    (%rdx,%r12,1),%edx
        andnl   %ecx,%eax,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%eax,%r14d
        leal    (%rdx,%r12,1),%edx
        xorl    %r14d,%r13d
        movl    %r8d,%r15d
        rorxl   $22,%r8d,%r12d
        leal    (%rdx,%r13,1),%edx
        xorl    %r9d,%r15d
        rorxl   $13,%r8d,%r14d
        rorxl   $2,%r8d,%r13d
        leal    (%r11,%rdx,1),%r11d
        andl    %r15d,%esi
        vpand   %xmm12,%xmm11,%xmm8
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 192-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r9d,%esi
        xorl    %r13d,%r14d
        leal    (%rdx,%rsi,1),%edx
        movl    %eax,%r12d
        addl    36(%rsp),%ecx
        andl    %r11d,%r12d
        rorxl   $25,%r11d,%r13d
        rorxl   $11,%r11d,%esi
        leal    (%rdx,%r14,1),%edx
        leal    (%rcx,%r12,1),%ecx
        andnl   %ebx,%r11d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r11d,%r14d
        leal    (%rcx,%r12,1),%ecx
        xorl    %r14d,%r13d
        movl    %edx,%esi
        rorxl   $22,%edx,%r12d
        leal    (%rcx,%r13,1),%ecx
        xorl    %r8d,%esi
        rorxl   $13,%edx,%r14d
        rorxl   $2,%edx,%r13d
        leal    (%r10,%rcx,1),%r10d
        andl    %esi,%r15d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 208-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r8d,%r15d
        xorl    %r13d,%r14d
        leal    (%rcx,%r15,1),%ecx
        movl    %r11d,%r12d
        addl    40(%rsp),%ebx
        andl    %r10d,%r12d
        rorxl   $25,%r10d,%r13d
        rorxl   $11,%r10d,%r15d
        leal    (%rcx,%r14,1),%ecx
        leal    (%rbx,%r12,1),%ebx
        andnl   %eax,%r10d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r10d,%r14d
        leal    (%rbx,%r12,1),%ebx
        xorl    %r14d,%r13d
        movl    %ecx,%r15d
        rorxl   $22,%ecx,%r12d
        leal    (%rbx,%r13,1),%ebx
        xorl    %edx,%r15d
        rorxl   $13,%ecx,%r14d
        rorxl   $2,%ecx,%r13d
        leal    (%r9,%rbx,1),%r9d
        andl    %r15d,%esi
        vpand   %xmm13,%xmm11,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 224-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %edx,%esi
        xorl    %r13d,%r14d
        leal    (%rbx,%rsi,1),%ebx
        movl    %r10d,%r12d
        addl    44(%rsp),%eax
        andl    %r9d,%r12d
        rorxl   $25,%r9d,%r13d
        rorxl   $11,%r9d,%esi
        leal    (%rbx,%r14,1),%ebx
        leal    (%rax,%r12,1),%eax
        andnl   %r11d,%r9d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r9d,%r14d
        leal    (%rax,%r12,1),%eax
        xorl    %r14d,%r13d
        movl    %ebx,%esi
        rorxl   $22,%ebx,%r12d
        leal    (%rax,%r13,1),%eax
        xorl    %ecx,%esi
        rorxl   $13,%ebx,%r14d
        rorxl   $2,%ebx,%r13d
        leal    (%r8,%rax,1),%r8d
        andl    %esi,%r15d
        vpor    %xmm11,%xmm8,%xmm8
        vaesenclast     %xmm10,%xmm9,%xmm11
        vmovdqu 0-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ecx,%r15d
        xorl    %r13d,%r14d
        leal    (%rax,%r15,1),%eax
        movl    %r9d,%r12d
        vpextrq $1,%xmm15,%r12
        vmovq   %xmm15,%r13
        movq    552(%rsp),%r15
        addl    %r14d,%eax
        leaq    448(%rsp),%rbp

        vpand   %xmm14,%xmm11,%xmm11
        vpor    %xmm11,%xmm8,%xmm8
        vmovdqu %xmm8,(%r12,%r13,1)
        leaq    16(%r13),%r13

        addl    0(%r15),%eax
        addl    4(%r15),%ebx
        addl    8(%r15),%ecx
        addl    12(%r15),%edx
        addl    16(%r15),%r8d
        addl    20(%r15),%r9d
        addl    24(%r15),%r10d
        addl    28(%r15),%r11d

        movl    %eax,0(%r15)
        movl    %ebx,4(%r15)
        movl    %ecx,8(%r15)
        movl    %edx,12(%r15)
        movl    %r8d,16(%r15)
        movl    %r9d,20(%r15)
        movl    %r10d,24(%r15)
        movl    %r11d,28(%r15)

        cmpq    80(%rbp),%r13
        je      .Ldone_avx2

        xorl    %r14d,%r14d
        movl    %ebx,%esi
        movl    %r9d,%r12d
        xorl    %ecx,%esi
        jmp     .Lower_avx2
.align  16
.Lower_avx2:
        vmovdqu (%r13),%xmm9
        vpinsrq $0,%r13,%xmm15,%xmm15
        addl    0+16(%rbp),%r11d
        andl    %r8d,%r12d
        rorxl   $25,%r8d,%r13d
        rorxl   $11,%r8d,%r15d
        leal    (%rax,%r14,1),%eax
        leal    (%r11,%r12,1),%r11d
        andnl   %r10d,%r8d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r8d,%r14d
        leal    (%r11,%r12,1),%r11d
        xorl    %r14d,%r13d
        movl    %eax,%r15d
        rorxl   $22,%eax,%r12d
        leal    (%r11,%r13,1),%r11d
        xorl    %ebx,%r15d
        rorxl   $13,%eax,%r14d
        rorxl   $2,%eax,%r13d
        leal    (%rdx,%r11,1),%edx
        andl    %r15d,%esi
        vpxor   %xmm10,%xmm9,%xmm9
        vmovdqu 16-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ebx,%esi
        xorl    %r13d,%r14d
        leal    (%r11,%rsi,1),%r11d
        movl    %r8d,%r12d
        addl    4+16(%rbp),%r10d
        andl    %edx,%r12d
        rorxl   $25,%edx,%r13d
        rorxl   $11,%edx,%esi
        leal    (%r11,%r14,1),%r11d
        leal    (%r10,%r12,1),%r10d
        andnl   %r9d,%edx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%edx,%r14d
        leal    (%r10,%r12,1),%r10d
        xorl    %r14d,%r13d
        movl    %r11d,%esi
        rorxl   $22,%r11d,%r12d
        leal    (%r10,%r13,1),%r10d
        xorl    %eax,%esi
        rorxl   $13,%r11d,%r14d
        rorxl   $2,%r11d,%r13d
        leal    (%rcx,%r10,1),%ecx
        andl    %esi,%r15d
        vpxor   %xmm8,%xmm9,%xmm9
        xorl    %r12d,%r14d
        xorl    %eax,%r15d
        xorl    %r13d,%r14d
        leal    (%r10,%r15,1),%r10d
        movl    %edx,%r12d
        addl    8+16(%rbp),%r9d
        andl    %ecx,%r12d
        rorxl   $25,%ecx,%r13d
        rorxl   $11,%ecx,%r15d
        leal    (%r10,%r14,1),%r10d
        leal    (%r9,%r12,1),%r9d
        andnl   %r8d,%ecx,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%ecx,%r14d
        leal    (%r9,%r12,1),%r9d
        xorl    %r14d,%r13d
        movl    %r10d,%r15d
        rorxl   $22,%r10d,%r12d
        leal    (%r9,%r13,1),%r9d
        xorl    %r11d,%r15d
        rorxl   $13,%r10d,%r14d
        rorxl   $2,%r10d,%r13d
        leal    (%rbx,%r9,1),%ebx
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 32-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r11d,%esi
        xorl    %r13d,%r14d
        leal    (%r9,%rsi,1),%r9d
        movl    %ecx,%r12d
        addl    12+16(%rbp),%r8d
        andl    %ebx,%r12d
        rorxl   $25,%ebx,%r13d
        rorxl   $11,%ebx,%esi
        leal    (%r9,%r14,1),%r9d
        leal    (%r8,%r12,1),%r8d
        andnl   %edx,%ebx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%ebx,%r14d
        leal    (%r8,%r12,1),%r8d
        xorl    %r14d,%r13d
        movl    %r9d,%esi
        rorxl   $22,%r9d,%r12d
        leal    (%r8,%r13,1),%r8d
        xorl    %r10d,%esi
        rorxl   $13,%r9d,%r14d
        rorxl   $2,%r9d,%r13d
        leal    (%rax,%r8,1),%eax
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 48-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r10d,%r15d
        xorl    %r13d,%r14d
        leal    (%r8,%r15,1),%r8d
        movl    %ebx,%r12d
        addl    32+16(%rbp),%edx
        andl    %eax,%r12d
        rorxl   $25,%eax,%r13d
        rorxl   $11,%eax,%r15d
        leal    (%r8,%r14,1),%r8d
        leal    (%rdx,%r12,1),%edx
        andnl   %ecx,%eax,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%eax,%r14d
        leal    (%rdx,%r12,1),%edx
        xorl    %r14d,%r13d
        movl    %r8d,%r15d
        rorxl   $22,%r8d,%r12d
        leal    (%rdx,%r13,1),%edx
        xorl    %r9d,%r15d
        rorxl   $13,%r8d,%r14d
        rorxl   $2,%r8d,%r13d
        leal    (%r11,%rdx,1),%r11d
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 64-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r9d,%esi
        xorl    %r13d,%r14d
        leal    (%rdx,%rsi,1),%edx
        movl    %eax,%r12d
        addl    36+16(%rbp),%ecx
        andl    %r11d,%r12d
        rorxl   $25,%r11d,%r13d
        rorxl   $11,%r11d,%esi
        leal    (%rdx,%r14,1),%edx
        leal    (%rcx,%r12,1),%ecx
        andnl   %ebx,%r11d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r11d,%r14d
        leal    (%rcx,%r12,1),%ecx
        xorl    %r14d,%r13d
        movl    %edx,%esi
        rorxl   $22,%edx,%r12d
        leal    (%rcx,%r13,1),%ecx
        xorl    %r8d,%esi
        rorxl   $13,%edx,%r14d
        rorxl   $2,%edx,%r13d
        leal    (%r10,%rcx,1),%r10d
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 80-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r8d,%r15d
        xorl    %r13d,%r14d
        leal    (%rcx,%r15,1),%ecx
        movl    %r11d,%r12d
        addl    40+16(%rbp),%ebx
        andl    %r10d,%r12d
        rorxl   $25,%r10d,%r13d
        rorxl   $11,%r10d,%r15d
        leal    (%rcx,%r14,1),%ecx
        leal    (%rbx,%r12,1),%ebx
        andnl   %eax,%r10d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r10d,%r14d
        leal    (%rbx,%r12,1),%ebx
        xorl    %r14d,%r13d
        movl    %ecx,%r15d
        rorxl   $22,%ecx,%r12d
        leal    (%rbx,%r13,1),%ebx
        xorl    %edx,%r15d
        rorxl   $13,%ecx,%r14d
        rorxl   $2,%ecx,%r13d
        leal    (%r9,%rbx,1),%r9d
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 96-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %edx,%esi
        xorl    %r13d,%r14d
        leal    (%rbx,%rsi,1),%ebx
        movl    %r10d,%r12d
        addl    44+16(%rbp),%eax
        andl    %r9d,%r12d
        rorxl   $25,%r9d,%r13d
        rorxl   $11,%r9d,%esi
        leal    (%rbx,%r14,1),%ebx
        leal    (%rax,%r12,1),%eax
        andnl   %r11d,%r9d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r9d,%r14d
        leal    (%rax,%r12,1),%eax
        xorl    %r14d,%r13d
        movl    %ebx,%esi
        rorxl   $22,%ebx,%r12d
        leal    (%rax,%r13,1),%eax
        xorl    %ecx,%esi
        rorxl   $13,%ebx,%r14d
        rorxl   $2,%ebx,%r13d
        leal    (%r8,%rax,1),%r8d
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 112-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ecx,%r15d
        xorl    %r13d,%r14d
        leal    (%rax,%r15,1),%eax
        movl    %r9d,%r12d
        leaq    -64(%rbp),%rbp
        addl    0+16(%rbp),%r11d
        andl    %r8d,%r12d
        rorxl   $25,%r8d,%r13d
        rorxl   $11,%r8d,%r15d
        leal    (%rax,%r14,1),%eax
        leal    (%r11,%r12,1),%r11d
        andnl   %r10d,%r8d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r8d,%r14d
        leal    (%r11,%r12,1),%r11d
        xorl    %r14d,%r13d
        movl    %eax,%r15d
        rorxl   $22,%eax,%r12d
        leal    (%r11,%r13,1),%r11d
        xorl    %ebx,%r15d
        rorxl   $13,%eax,%r14d
        rorxl   $2,%eax,%r13d
        leal    (%rdx,%r11,1),%edx
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 128-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ebx,%esi
        xorl    %r13d,%r14d
        leal    (%r11,%rsi,1),%r11d
        movl    %r8d,%r12d
        addl    4+16(%rbp),%r10d
        andl    %edx,%r12d
        rorxl   $25,%edx,%r13d
        rorxl   $11,%edx,%esi
        leal    (%r11,%r14,1),%r11d
        leal    (%r10,%r12,1),%r10d
        andnl   %r9d,%edx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%edx,%r14d
        leal    (%r10,%r12,1),%r10d
        xorl    %r14d,%r13d
        movl    %r11d,%esi
        rorxl   $22,%r11d,%r12d
        leal    (%r10,%r13,1),%r10d
        xorl    %eax,%esi
        rorxl   $13,%r11d,%r14d
        rorxl   $2,%r11d,%r13d
        leal    (%rcx,%r10,1),%ecx
        andl    %esi,%r15d
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 144-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %eax,%r15d
        xorl    %r13d,%r14d
        leal    (%r10,%r15,1),%r10d
        movl    %edx,%r12d
        addl    8+16(%rbp),%r9d
        andl    %ecx,%r12d
        rorxl   $25,%ecx,%r13d
        rorxl   $11,%ecx,%r15d
        leal    (%r10,%r14,1),%r10d
        leal    (%r9,%r12,1),%r9d
        andnl   %r8d,%ecx,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%ecx,%r14d
        leal    (%r9,%r12,1),%r9d
        xorl    %r14d,%r13d
        movl    %r10d,%r15d
        rorxl   $22,%r10d,%r12d
        leal    (%r9,%r13,1),%r9d
        xorl    %r11d,%r15d
        rorxl   $13,%r10d,%r14d
        rorxl   $2,%r10d,%r13d
        leal    (%rbx,%r9,1),%ebx
        andl    %r15d,%esi
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 160-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r11d,%esi
        xorl    %r13d,%r14d
        leal    (%r9,%rsi,1),%r9d
        movl    %ecx,%r12d
        addl    12+16(%rbp),%r8d
        andl    %ebx,%r12d
        rorxl   $25,%ebx,%r13d
        rorxl   $11,%ebx,%esi
        leal    (%r9,%r14,1),%r9d
        leal    (%r8,%r12,1),%r8d
        andnl   %edx,%ebx,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%ebx,%r14d
        leal    (%r8,%r12,1),%r8d
        xorl    %r14d,%r13d
        movl    %r9d,%esi
        rorxl   $22,%r9d,%r12d
        leal    (%r8,%r13,1),%r8d
        xorl    %r10d,%esi
        rorxl   $13,%r9d,%r14d
        rorxl   $2,%r9d,%r13d
        leal    (%rax,%r8,1),%eax
        andl    %esi,%r15d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 176-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r10d,%r15d
        xorl    %r13d,%r14d
        leal    (%r8,%r15,1),%r8d
        movl    %ebx,%r12d
        addl    32+16(%rbp),%edx
        andl    %eax,%r12d
        rorxl   $25,%eax,%r13d
        rorxl   $11,%eax,%r15d
        leal    (%r8,%r14,1),%r8d
        leal    (%rdx,%r12,1),%edx
        andnl   %ecx,%eax,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%eax,%r14d
        leal    (%rdx,%r12,1),%edx
        xorl    %r14d,%r13d
        movl    %r8d,%r15d
        rorxl   $22,%r8d,%r12d
        leal    (%rdx,%r13,1),%edx
        xorl    %r9d,%r15d
        rorxl   $13,%r8d,%r14d
        rorxl   $2,%r8d,%r13d
        leal    (%r11,%rdx,1),%r11d
        andl    %r15d,%esi
        vpand   %xmm12,%xmm11,%xmm8
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 192-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r9d,%esi
        xorl    %r13d,%r14d
        leal    (%rdx,%rsi,1),%edx
        movl    %eax,%r12d
        addl    36+16(%rbp),%ecx
        andl    %r11d,%r12d
        rorxl   $25,%r11d,%r13d
        rorxl   $11,%r11d,%esi
        leal    (%rdx,%r14,1),%edx
        leal    (%rcx,%r12,1),%ecx
        andnl   %ebx,%r11d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r11d,%r14d
        leal    (%rcx,%r12,1),%ecx
        xorl    %r14d,%r13d
        movl    %edx,%esi
        rorxl   $22,%edx,%r12d
        leal    (%rcx,%r13,1),%ecx
        xorl    %r8d,%esi
        rorxl   $13,%edx,%r14d
        rorxl   $2,%edx,%r13d
        leal    (%r10,%rcx,1),%r10d
        andl    %esi,%r15d
        vaesenclast     %xmm10,%xmm9,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 208-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %r8d,%r15d
        xorl    %r13d,%r14d
        leal    (%rcx,%r15,1),%ecx
        movl    %r11d,%r12d
        addl    40+16(%rbp),%ebx
        andl    %r10d,%r12d
        rorxl   $25,%r10d,%r13d
        rorxl   $11,%r10d,%r15d
        leal    (%rcx,%r14,1),%ecx
        leal    (%rbx,%r12,1),%ebx
        andnl   %eax,%r10d,%r12d
        xorl    %r15d,%r13d
        rorxl   $6,%r10d,%r14d
        leal    (%rbx,%r12,1),%ebx
        xorl    %r14d,%r13d
        movl    %ecx,%r15d
        rorxl   $22,%ecx,%r12d
        leal    (%rbx,%r13,1),%ebx
        xorl    %edx,%r15d
        rorxl   $13,%ecx,%r14d
        rorxl   $2,%ecx,%r13d
        leal    (%r9,%rbx,1),%r9d
        andl    %r15d,%esi
        vpand   %xmm13,%xmm11,%xmm11
        vaesenc %xmm10,%xmm9,%xmm9
        vmovdqu 224-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %edx,%esi
        xorl    %r13d,%r14d
        leal    (%rbx,%rsi,1),%ebx
        movl    %r10d,%r12d
        addl    44+16(%rbp),%eax
        andl    %r9d,%r12d
        rorxl   $25,%r9d,%r13d
        rorxl   $11,%r9d,%esi
        leal    (%rbx,%r14,1),%ebx
        leal    (%rax,%r12,1),%eax
        andnl   %r11d,%r9d,%r12d
        xorl    %esi,%r13d
        rorxl   $6,%r9d,%r14d
        leal    (%rax,%r12,1),%eax
        xorl    %r14d,%r13d
        movl    %ebx,%esi
        rorxl   $22,%ebx,%r12d
        leal    (%rax,%r13,1),%eax
        xorl    %ecx,%esi
        rorxl   $13,%ebx,%r14d
        rorxl   $2,%ebx,%r13d
        leal    (%r8,%rax,1),%r8d
        andl    %esi,%r15d
        vpor    %xmm11,%xmm8,%xmm8
        vaesenclast     %xmm10,%xmm9,%xmm11
        vmovdqu 0-128(%rdi),%xmm10
        xorl    %r12d,%r14d
        xorl    %ecx,%r15d
        xorl    %r13d,%r14d
        leal    (%rax,%r15,1),%eax
        movl    %r9d,%r12d
        vmovq   %xmm15,%r13
        vpextrq $1,%xmm15,%r15
        vpand   %xmm14,%xmm11,%xmm11
        vpor    %xmm11,%xmm8,%xmm8
        leaq    -64(%rbp),%rbp
        vmovdqu %xmm8,(%r15,%r13,1)
        leaq    16(%r13),%r13
        cmpq    %rsp,%rbp
        jae     .Lower_avx2

        movq    552(%rsp),%r15
        leaq    64(%r13),%r13
        movq    560(%rsp),%rsi
        addl    %r14d,%eax
        leaq    448(%rsp),%rsp

        addl    0(%r15),%eax
        addl    4(%r15),%ebx
        addl    8(%r15),%ecx
        addl    12(%r15),%edx
        addl    16(%r15),%r8d
        addl    20(%r15),%r9d
        addl    24(%r15),%r10d
        leaq    (%rsi,%r13,1),%r12
        addl    28(%r15),%r11d

        cmpq    64+16(%rsp),%r13

        movl    %eax,0(%r15)
        cmoveq  %rsp,%r12
        movl    %ebx,4(%r15)
        movl    %ecx,8(%r15)
        movl    %edx,12(%r15)
        movl    %r8d,16(%r15)
        movl    %r9d,20(%r15)
        movl    %r10d,24(%r15)
        movl    %r11d,28(%r15)

        jbe     .Loop_avx2
        leaq    (%rsp),%rbp


.cfi_escape     0x0f,0x06,0x76,0xf8,0x00,0x06,0x23,0x08

.Ldone_avx2:
        movq    64+32(%rbp),%r8
        movq    64+56(%rbp),%rsi
.cfi_def_cfa    %rsi,8
        vmovdqu %xmm8,(%r8)
        vzeroall
        movq    -48(%rsi),%r15
.cfi_restore    %r15
        movq    -40(%rsi),%r14
.cfi_restore    %r14
        movq    -32(%rsi),%r13
.cfi_restore    %r13
        movq    -24(%rsi),%r12
.cfi_restore    %r12
        movq    -16(%rsi),%rbp
.cfi_restore    %rbp
        movq    -8(%rsi),%rbx
.cfi_restore    %rbx
        leaq    (%rsi),%rsp
.cfi_def_cfa_register   %rsp
.Lepilogue_avx2:
        .byte   0xf3,0xc3
.cfi_endproc
.size   aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2
.type   aesni_cbc_sha256_enc_shaext,@function
.align  32
aesni_cbc_sha256_enc_shaext:
.cfi_startproc
        movq    8(%rsp),%r10
        leaq    K256+128(%rip),%rax
        movdqu  (%r9),%xmm1
        movdqu  16(%r9),%xmm2
        movdqa  512-128(%rax),%xmm3

        movl    240(%rcx),%r11d
        subq    %rdi,%rsi
        movups  (%rcx),%xmm15
        movups  (%r8),%xmm6
        movups  16(%rcx),%xmm4
        leaq    112(%rcx),%rcx

        pshufd  $0x1b,%xmm1,%xmm0
        pshufd  $0xb1,%xmm1,%xmm1
        pshufd  $0x1b,%xmm2,%xmm2
        movdqa  %xmm3,%xmm7
.byte   102,15,58,15,202,8
        punpcklqdq      %xmm0,%xmm2

        jmp     .Loop_shaext

.align  16
.Loop_shaext:
        movdqu  (%r10),%xmm10
        movdqu  16(%r10),%xmm11
        movdqu  32(%r10),%xmm12
.byte   102,68,15,56,0,211
        movdqu  48(%r10),%xmm13

        movdqa  0-128(%rax),%xmm0
        paddd   %xmm10,%xmm0
.byte   102,68,15,56,0,219
        movdqa  %xmm2,%xmm9
        movdqa  %xmm1,%xmm8
        movups  0(%rdi),%xmm14
        xorps   %xmm15,%xmm14
        xorps   %xmm14,%xmm6
        movups  -80(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movups  -64(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,202

        movdqa  32-128(%rax),%xmm0
        paddd   %xmm11,%xmm0
.byte   102,68,15,56,0,227
        leaq    64(%r10),%r10
        movups  -48(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movups  -32(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,202

        movdqa  64-128(%rax),%xmm0
        paddd   %xmm12,%xmm0
.byte   102,68,15,56,0,235
.byte   69,15,56,204,211
        movups  -16(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movdqa  %xmm13,%xmm3
.byte   102,65,15,58,15,220,4
        paddd   %xmm3,%xmm10
        movups  0(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,202

        movdqa  96-128(%rax),%xmm0
        paddd   %xmm13,%xmm0
.byte   69,15,56,205,213
.byte   69,15,56,204,220
        movups  16(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movups  32(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
        movdqa  %xmm10,%xmm3
.byte   102,65,15,58,15,221,4
        paddd   %xmm3,%xmm11
.byte   15,56,203,202
        movdqa  128-128(%rax),%xmm0
        paddd   %xmm10,%xmm0
.byte   69,15,56,205,218
.byte   69,15,56,204,229
        movups  48(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movdqa  %xmm11,%xmm3
.byte   102,65,15,58,15,218,4
        paddd   %xmm3,%xmm12
        cmpl    $11,%r11d
        jb      .Laesenclast1
        movups  64(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
        movups  80(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
        je      .Laesenclast1
        movups  96(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
        movups  112(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.Laesenclast1:
        aesenclast      %xmm5,%xmm6
        movups  16-112(%rcx),%xmm4
        nop
.byte   15,56,203,202
        movups  16(%rdi),%xmm14
        xorps   %xmm15,%xmm14
        movups  %xmm6,0(%rsi,%rdi,1)
        xorps   %xmm14,%xmm6
        movups  -80(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
        movdqa  160-128(%rax),%xmm0
        paddd   %xmm11,%xmm0
.byte   69,15,56,205,227
.byte   69,15,56,204,234
        movups  -64(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movdqa  %xmm12,%xmm3
.byte   102,65,15,58,15,219,4
        paddd   %xmm3,%xmm13
        movups  -48(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,202
        movdqa  192-128(%rax),%xmm0
        paddd   %xmm12,%xmm0
.byte   69,15,56,205,236
.byte   69,15,56,204,211
        movups  -32(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movdqa  %xmm13,%xmm3
.byte   102,65,15,58,15,220,4
        paddd   %xmm3,%xmm10
        movups  -16(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,202
        movdqa  224-128(%rax),%xmm0
        paddd   %xmm13,%xmm0
.byte   69,15,56,205,213
.byte   69,15,56,204,220
        movups  0(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movdqa  %xmm10,%xmm3
.byte   102,65,15,58,15,221,4
        paddd   %xmm3,%xmm11
        movups  16(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,202
        movdqa  256-128(%rax),%xmm0
        paddd   %xmm10,%xmm0
.byte   69,15,56,205,218
.byte   69,15,56,204,229
        movups  32(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movdqa  %xmm11,%xmm3
.byte   102,65,15,58,15,218,4
        paddd   %xmm3,%xmm12
        movups  48(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
        cmpl    $11,%r11d
        jb      .Laesenclast2
        movups  64(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
        movups  80(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
        je      .Laesenclast2
        movups  96(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
        movups  112(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.Laesenclast2:
        aesenclast      %xmm5,%xmm6
        movups  16-112(%rcx),%xmm4
        nop
.byte   15,56,203,202
        movups  32(%rdi),%xmm14
        xorps   %xmm15,%xmm14
        movups  %xmm6,16(%rsi,%rdi,1)
        xorps   %xmm14,%xmm6
        movups  -80(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
        movdqa  288-128(%rax),%xmm0
        paddd   %xmm11,%xmm0
.byte   69,15,56,205,227
.byte   69,15,56,204,234
        movups  -64(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movdqa  %xmm12,%xmm3
.byte   102,65,15,58,15,219,4
        paddd   %xmm3,%xmm13
        movups  -48(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,202
        movdqa  320-128(%rax),%xmm0
        paddd   %xmm12,%xmm0
.byte   69,15,56,205,236
.byte   69,15,56,204,211
        movups  -32(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movdqa  %xmm13,%xmm3
.byte   102,65,15,58,15,220,4
        paddd   %xmm3,%xmm10
        movups  -16(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,202
        movdqa  352-128(%rax),%xmm0
        paddd   %xmm13,%xmm0
.byte   69,15,56,205,213
.byte   69,15,56,204,220
        movups  0(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movdqa  %xmm10,%xmm3
.byte   102,65,15,58,15,221,4
        paddd   %xmm3,%xmm11
        movups  16(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,202
        movdqa  384-128(%rax),%xmm0
        paddd   %xmm10,%xmm0
.byte   69,15,56,205,218
.byte   69,15,56,204,229
        movups  32(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movdqa  %xmm11,%xmm3
.byte   102,65,15,58,15,218,4
        paddd   %xmm3,%xmm12
        movups  48(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,202
        movdqa  416-128(%rax),%xmm0
        paddd   %xmm11,%xmm0
.byte   69,15,56,205,227
.byte   69,15,56,204,234
        cmpl    $11,%r11d
        jb      .Laesenclast3
        movups  64(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
        movups  80(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
        je      .Laesenclast3
        movups  96(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
        movups  112(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.Laesenclast3:
        aesenclast      %xmm5,%xmm6
        movups  16-112(%rcx),%xmm4
        nop
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movdqa  %xmm12,%xmm3
.byte   102,65,15,58,15,219,4
        paddd   %xmm3,%xmm13
        movups  48(%rdi),%xmm14
        xorps   %xmm15,%xmm14
        movups  %xmm6,32(%rsi,%rdi,1)
        xorps   %xmm14,%xmm6
        movups  -80(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
        movups  -64(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,202

        movdqa  448-128(%rax),%xmm0
        paddd   %xmm12,%xmm0
.byte   69,15,56,205,236
        movdqa  %xmm7,%xmm3
        movups  -48(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movups  -32(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,202

        movdqa  480-128(%rax),%xmm0
        paddd   %xmm13,%xmm0
        movups  -16(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
        movups  0(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
.byte   15,56,203,209
        pshufd  $0x0e,%xmm0,%xmm0
        movups  16(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.byte   15,56,203,202

        movups  32(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
        movups  48(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
        cmpl    $11,%r11d
        jb      .Laesenclast4
        movups  64(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
        movups  80(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
        je      .Laesenclast4
        movups  96(%rcx),%xmm4
        aesenc  %xmm5,%xmm6
        movups  112(%rcx),%xmm5
        aesenc  %xmm4,%xmm6
.Laesenclast4:
        aesenclast      %xmm5,%xmm6
        movups  16-112(%rcx),%xmm4
        nop

        paddd   %xmm9,%xmm2
        paddd   %xmm8,%xmm1

        decq    %rdx
        movups  %xmm6,48(%rsi,%rdi,1)
        leaq    64(%rdi),%rdi
        jnz     .Loop_shaext

        pshufd  $0xb1,%xmm2,%xmm2
        pshufd  $0x1b,%xmm1,%xmm3
        pshufd  $0xb1,%xmm1,%xmm1
        punpckhqdq      %xmm2,%xmm1
.byte   102,15,58,15,211,8

        movups  %xmm6,(%r8)
        movdqu  %xmm1,(%r9)
        movdqu  %xmm2,16(%r9)
        .byte   0xf3,0xc3
.cfi_endproc
.size   aesni_cbc_sha256_enc_shaext,.-aesni_cbc_sha256_enc_shaext
        .section ".note.gnu.property", "a"
        .p2align 3
        .long 1f - 0f
        .long 4f - 1f
        .long 5
0:
        # "GNU" encoded with .byte, since .asciz isn't supported
        # on Solaris.
        .byte 0x47
        .byte 0x4e
        .byte 0x55
        .byte 0
1:
        .p2align 3
        .long 0xc0000002
        .long 3f - 2f
2:
        .long 3
3:
        .p2align 3
4:
